# flake8: noqa
import torch


inf = float("inf")


def forward(
    self,
    arg0_1: "f32[][]cuda:0",
    arg1_1: "f32[50][1]cuda:0",
    arg2_1: "f32[23][1]cuda:0",
    arg3_1: "f32[38][1]cuda:0",
    arg4_1: "f32[5][1]cuda:0",
    arg5_1: "f32[100][1]cuda:0",
    arg6_1: "f32[50][1]cuda:0",
    arg7_1: "f32[77][1]cuda:0",
    arg8_1: "f32[100][1]cuda:0",
    arg9_1: "f32[100][1]cuda:0",
    arg10_1: "f32[96][1]cuda:0",
    arg11_1: "f32[78][1]cuda:0",
    arg12_1: "f32[100][1]cuda:0",
    arg13_1: "f32[100][1]cuda:0",
    arg14_1: "f32[97][1]cuda:0",
    arg15_1: "f32[819, 732][732, 1]cuda:0",
    arg16_1: "f32[204][1]cuda:0",
    arg17_1: "f32[64][1]cuda:0",
    arg18_1: "f32[204][1]cuda:0",
    arg19_1: "f32[64, 204][204, 1]cuda:0",
    arg20_1: "f32[204][1]cuda:0",
    arg21_1: "f32[204, 160][160, 1]cuda:0",
    arg22_1: "f32[204][1]cuda:0",
    arg23_1: "f32[64][1]cuda:0",
    arg24_1: "f32[204][1]cuda:0",
    arg25_1: "f32[64, 204][204, 1]cuda:0",
    arg26_1: "f32[204][1]cuda:0",
    arg27_1: "f32[204][1]cuda:0",
    arg28_1: "f32[64][1]cuda:0",
    arg29_1: "f32[204][1]cuda:0",
    arg30_1: "f32[64, 204][204, 1]cuda:0",
    arg31_1: "f32[204][1]cuda:0",
    arg32_1: "f32[204, 72][72, 1]cuda:0",
    arg33_1: "f32[204][1]cuda:0",
    arg34_1: "f32[64][1]cuda:0",
    arg35_1: "f32[64, 204][204, 1]cuda:0",
    arg36_1: "f32[768, 2675][2675, 1]cuda:0",
    arg37_1: "f32[768, 2048][2048, 1]cuda:0",
    arg38_1: "f32[768][1]cuda:0",
    arg39_1: "f32[4096][1]cuda:0",
    arg40_1: "f32[4096, 256][256, 1]cuda:0",
    arg41_1: "f32[64][1]cuda:0",
    arg42_1: "f32[2675][1]cuda:0",
    arg43_1: "f32[1536, 4096][4096, 1]cuda:0",
    arg44_1: "f32[4096][1]cuda:0",
    arg45_1: "f32[1840][1]cuda:0",
    arg46_1: "f32[2048, 2675][2675, 1]cuda:0",
    arg47_1: "f32[2048][1]cuda:0",
    arg48_1: "f32[2048][1]cuda:0",
    arg49_1: "f32[768][1]cuda:0",
    arg50_1: "f32[256][1]cuda:0",
    arg51_1: "f32[768, 2048][2048, 1]cuda:0",
    arg52_1: "f32[4096][1]cuda:0",
    arg53_1: "f32[104][1]cuda:0",
    arg54_1: "f32[768][1]cuda:0",
    arg55_1: "f32[1024][1]cuda:0",
    arg56_1: "f32[2048][1]cuda:0",
    arg57_1: "f32[768, 2675][2675, 1]cuda:0",
    arg58_1: "f32[2675][1]cuda:0",
    arg59_1: "f32[256][1]cuda:0",
    arg60_1: "f32[768][1]cuda:0",
    arg61_1: "f32[256, 768][768, 1]cuda:0",
    arg62_1: "f32[64][1]cuda:0",
    arg63_1: "f32[1536][1]cuda:0",
    arg64_1: "f32[2048][1]cuda:0",
    arg65_1: "f32[3360][1]cuda:0",
    arg66_1: "f32[768][1]cuda:0",
    arg67_1: "f32[768, 2048][2048, 1]cuda:0",
    arg68_1: "f32[256][1]cuda:0",
    arg69_1: "f32[104, 256][256, 1]cuda:0",
    arg70_1: "f32[2675][1]cuda:0",
    arg71_1: "f32[768][1]cuda:0",
    arg72_1: "f32[2048][1]cuda:0",
    arg73_1: "f32[1024][1]cuda:0",
    arg74_1: "f32[64, 612][612, 1]cuda:0",
    arg75_1: "f32[128][1]cuda:0",
    arg76_1: "f32[308, 256][256, 1]cuda:0",
    arg77_1: "f32[1][1]cuda:0",
    arg78_1: "f32[512][1]cuda:0",
    arg79_1: "f32[512][1]cuda:0",
    arg80_1: "f32[50][1]cuda:0",
    arg81_1: "f32[23][1]cuda:0",
    arg82_1: "f32[38][1]cuda:0",
    arg83_1: "f32[5][1]cuda:0",
    arg84_1: "f32[100][1]cuda:0",
    arg85_1: "f32[50][1]cuda:0",
    arg86_1: "f32[77][1]cuda:0",
    arg87_1: "f32[100][1]cuda:0",
    arg88_1: "f32[100][1]cuda:0",
    arg89_1: "f32[96][1]cuda:0",
    arg90_1: "f32[78][1]cuda:0",
    arg91_1: "f32[100][1]cuda:0",
    arg92_1: "f32[100][1]cuda:0",
    arg93_1: "f32[97][1]cuda:0",
    arg94_1: "f32[819, 732][732, 1]cuda:0",
    arg95_1: "f32[204][1]cuda:0",
    arg96_1: "f32[64][1]cuda:0",
    arg97_1: "f32[204][1]cuda:0",
    arg98_1: "f32[64, 204][204, 1]cuda:0",
    arg99_1: "f32[204][1]cuda:0",
    arg100_1: "f32[204, 160][160, 1]cuda:0",
    arg101_1: "f32[204][1]cuda:0",
    arg102_1: "f32[64][1]cuda:0",
    arg103_1: "f32[204][1]cuda:0",
    arg104_1: "f32[64, 204][204, 1]cuda:0",
    arg105_1: "f32[204][1]cuda:0",
    arg106_1: "f32[204][1]cuda:0",
    arg107_1: "f32[64][1]cuda:0",
    arg108_1: "f32[204][1]cuda:0",
    arg109_1: "f32[64, 204][204, 1]cuda:0",
    arg110_1: "f32[204][1]cuda:0",
    arg111_1: "f32[204, 72][72, 1]cuda:0",
    arg112_1: "f32[204][1]cuda:0",
    arg113_1: "f32[64][1]cuda:0",
    arg114_1: "f32[64, 204][204, 1]cuda:0",
    arg115_1: "f32[768, 2675][2675, 1]cuda:0",
    arg116_1: "f32[768, 2048][2048, 1]cuda:0",
    arg117_1: "f32[768][1]cuda:0",
    arg118_1: "f32[4096][1]cuda:0",
    arg119_1: "f32[4096, 256][256, 1]cuda:0",
    arg120_1: "f32[64][1]cuda:0",
    arg121_1: "f32[2675][1]cuda:0",
    arg122_1: "f32[1536, 4096][22320, 1]cuda:0",
    arg123_1: "f32[4096][1]cuda:0",
    arg124_1: "f32[1840][1]cuda:0",
    arg125_1: "f32[2048, 2675][2675, 1]cuda:0",
    arg126_1: "f32[2048][1]cuda:0",
    arg127_1: "f32[2048][1]cuda:0",
    arg128_1: "f32[768][1]cuda:0",
    arg129_1: "f32[256][1]cuda:0",
    arg130_1: "f32[768, 2048][2048, 1]cuda:0",
    arg131_1: "f32[4096][1]cuda:0",
    arg132_1: "f32[104][1]cuda:0",
    arg133_1: "f32[768][1]cuda:0",
    arg134_1: "f32[1024][1]cuda:0",
    arg135_1: "f32[2048][1]cuda:0",
    arg136_1: "f32[768, 2675][2675, 1]cuda:0",
    arg137_1: "f32[2675][1]cuda:0",
    arg138_1: "f32[256][1]cuda:0",
    arg139_1: "f32[768][1]cuda:0",
    arg140_1: "f32[256, 768][768, 1]cuda:0",
    arg141_1: "f32[64][1]cuda:0",
    arg142_1: "f32[1536][1]cuda:0",
    arg143_1: "f32[2048][1]cuda:0",
    arg144_1: "f32[3360][1]cuda:0",
    arg145_1: "f32[768][1]cuda:0",
    arg146_1: "f32[768, 2048][2048, 1]cuda:0",
    arg147_1: "f32[256][1]cuda:0",
    arg148_1: "f32[104, 256][256, 1]cuda:0",
    arg149_1: "f32[2675][1]cuda:0",
    arg150_1: "f32[768][1]cuda:0",
    arg151_1: "f32[2048][1]cuda:0",
    arg152_1: "f32[1024][1]cuda:0",
    arg153_1: "f32[64, 612][612, 1]cuda:0",
    arg154_1: "f32[128][1]cuda:0",
    arg155_1: "f32[308, 256][256, 1]cuda:0",
    arg156_1: "f32[1][1]cuda:0",
    arg157_1: "f32[512][1]cuda:0",
    arg158_1: "f32[512][1]cuda:0",
):
    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:328 in torch_dynamo_resume_in__per_group_step_impl_at_316, code: -lr,
    neg: "f32[][]cuda:0" = torch.ops.aten.neg.default(arg0_1)
    arg0_1 = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:231 in _compute_clippy_shrinkage, code: masked_blocked_nom = torch._foreach_mul(
    _foreach_mul = torch.ops.aten._foreach_mul.Tensor(
        [
            arg1_1,
            arg2_1,
            arg3_1,
            arg4_1,
            arg5_1,
            arg6_1,
            arg7_1,
            arg8_1,
            arg9_1,
            arg10_1,
            arg11_1,
            arg12_1,
            arg13_1,
            arg14_1,
            arg15_1,
            arg16_1,
            arg17_1,
            arg18_1,
            arg19_1,
            arg20_1,
            arg21_1,
            arg22_1,
            arg23_1,
            arg24_1,
            arg25_1,
            arg26_1,
            arg27_1,
            arg28_1,
            arg29_1,
            arg30_1,
            arg31_1,
            arg32_1,
            arg33_1,
            arg34_1,
            arg35_1,
            arg36_1,
            arg37_1,
            arg38_1,
            arg39_1,
            arg40_1,
            arg41_1,
            arg42_1,
            arg43_1,
            arg44_1,
            arg45_1,
            arg46_1,
            arg47_1,
            arg48_1,
            arg49_1,
            arg50_1,
            arg51_1,
            arg52_1,
            arg53_1,
            arg54_1,
            arg55_1,
            arg56_1,
            arg57_1,
            arg58_1,
            arg59_1,
            arg60_1,
            arg61_1,
            arg62_1,
            arg63_1,
            arg64_1,
            arg65_1,
            arg66_1,
            arg67_1,
            arg68_1,
            arg69_1,
            arg70_1,
            arg71_1,
            arg72_1,
            arg73_1,
            arg74_1,
            arg75_1,
            arg76_1,
            arg77_1,
            arg78_1,
            arg79_1,
        ],
        neg,
    )
    getitem: "f32[50][1]cuda:0" = _foreach_mul[0]
    getitem_1: "f32[23][1]cuda:0" = _foreach_mul[1]
    getitem_2: "f32[38][1]cuda:0" = _foreach_mul[2]
    getitem_3: "f32[5][1]cuda:0" = _foreach_mul[3]
    getitem_4: "f32[100][1]cuda:0" = _foreach_mul[4]
    getitem_5: "f32[50][1]cuda:0" = _foreach_mul[5]
    getitem_6: "f32[77][1]cuda:0" = _foreach_mul[6]
    getitem_7: "f32[100][1]cuda:0" = _foreach_mul[7]
    getitem_8: "f32[100][1]cuda:0" = _foreach_mul[8]
    getitem_9: "f32[96][1]cuda:0" = _foreach_mul[9]
    getitem_10: "f32[78][1]cuda:0" = _foreach_mul[10]
    getitem_11: "f32[100][1]cuda:0" = _foreach_mul[11]
    getitem_12: "f32[100][1]cuda:0" = _foreach_mul[12]
    getitem_13: "f32[97][1]cuda:0" = _foreach_mul[13]
    getitem_14: "f32[819, 732][732, 1]cuda:0" = _foreach_mul[14]
    getitem_15: "f32[204][1]cuda:0" = _foreach_mul[15]
    getitem_16: "f32[64][1]cuda:0" = _foreach_mul[16]
    getitem_17: "f32[204][1]cuda:0" = _foreach_mul[17]
    getitem_18: "f32[64, 204][204, 1]cuda:0" = _foreach_mul[18]
    getitem_19: "f32[204][1]cuda:0" = _foreach_mul[19]
    getitem_20: "f32[204, 160][160, 1]cuda:0" = _foreach_mul[20]
    getitem_21: "f32[204][1]cuda:0" = _foreach_mul[21]
    getitem_22: "f32[64][1]cuda:0" = _foreach_mul[22]
    getitem_23: "f32[204][1]cuda:0" = _foreach_mul[23]
    getitem_24: "f32[64, 204][204, 1]cuda:0" = _foreach_mul[24]
    getitem_25: "f32[204][1]cuda:0" = _foreach_mul[25]
    getitem_26: "f32[204][1]cuda:0" = _foreach_mul[26]
    getitem_27: "f32[64][1]cuda:0" = _foreach_mul[27]
    getitem_28: "f32[204][1]cuda:0" = _foreach_mul[28]
    getitem_29: "f32[64, 204][204, 1]cuda:0" = _foreach_mul[29]
    getitem_30: "f32[204][1]cuda:0" = _foreach_mul[30]
    getitem_31: "f32[204, 72][72, 1]cuda:0" = _foreach_mul[31]
    getitem_32: "f32[204][1]cuda:0" = _foreach_mul[32]
    getitem_33: "f32[64][1]cuda:0" = _foreach_mul[33]
    getitem_34: "f32[64, 204][204, 1]cuda:0" = _foreach_mul[34]
    getitem_35: "f32[768, 2675][2675, 1]cuda:0" = _foreach_mul[35]
    getitem_36: "f32[768, 2048][2048, 1]cuda:0" = _foreach_mul[36]
    getitem_37: "f32[768][1]cuda:0" = _foreach_mul[37]
    getitem_38: "f32[4096][1]cuda:0" = _foreach_mul[38]
    getitem_39: "f32[4096, 256][256, 1]cuda:0" = _foreach_mul[39]
    getitem_40: "f32[64][1]cuda:0" = _foreach_mul[40]
    getitem_41: "f32[2675][1]cuda:0" = _foreach_mul[41]
    getitem_42: "f32[1536, 4096][4096, 1]cuda:0" = _foreach_mul[42]
    getitem_43: "f32[4096][1]cuda:0" = _foreach_mul[43]
    getitem_44: "f32[1840][1]cuda:0" = _foreach_mul[44]
    getitem_45: "f32[2048, 2675][2675, 1]cuda:0" = _foreach_mul[45]
    getitem_46: "f32[2048][1]cuda:0" = _foreach_mul[46]
    getitem_47: "f32[2048][1]cuda:0" = _foreach_mul[47]
    getitem_48: "f32[768][1]cuda:0" = _foreach_mul[48]
    getitem_49: "f32[256][1]cuda:0" = _foreach_mul[49]
    getitem_50: "f32[768, 2048][2048, 1]cuda:0" = _foreach_mul[50]
    getitem_51: "f32[4096][1]cuda:0" = _foreach_mul[51]
    getitem_52: "f32[104][1]cuda:0" = _foreach_mul[52]
    getitem_53: "f32[768][1]cuda:0" = _foreach_mul[53]
    getitem_54: "f32[1024][1]cuda:0" = _foreach_mul[54]
    getitem_55: "f32[2048][1]cuda:0" = _foreach_mul[55]
    getitem_56: "f32[768, 2675][2675, 1]cuda:0" = _foreach_mul[56]
    getitem_57: "f32[2675][1]cuda:0" = _foreach_mul[57]
    getitem_58: "f32[256][1]cuda:0" = _foreach_mul[58]
    getitem_59: "f32[768][1]cuda:0" = _foreach_mul[59]
    getitem_60: "f32[256, 768][768, 1]cuda:0" = _foreach_mul[60]
    getitem_61: "f32[64][1]cuda:0" = _foreach_mul[61]
    getitem_62: "f32[1536][1]cuda:0" = _foreach_mul[62]
    getitem_63: "f32[2048][1]cuda:0" = _foreach_mul[63]
    getitem_64: "f32[3360][1]cuda:0" = _foreach_mul[64]
    getitem_65: "f32[768][1]cuda:0" = _foreach_mul[65]
    getitem_66: "f32[768, 2048][2048, 1]cuda:0" = _foreach_mul[66]
    getitem_67: "f32[256][1]cuda:0" = _foreach_mul[67]
    getitem_68: "f32[104, 256][256, 1]cuda:0" = _foreach_mul[68]
    getitem_69: "f32[2675][1]cuda:0" = _foreach_mul[69]
    getitem_70: "f32[768][1]cuda:0" = _foreach_mul[70]
    getitem_71: "f32[2048][1]cuda:0" = _foreach_mul[71]
    getitem_72: "f32[1024][1]cuda:0" = _foreach_mul[72]
    getitem_73: "f32[64, 612][612, 1]cuda:0" = _foreach_mul[73]
    getitem_74: "f32[128][1]cuda:0" = _foreach_mul[74]
    getitem_75: "f32[308, 256][256, 1]cuda:0" = _foreach_mul[75]
    getitem_76: "f32[1][1]cuda:0" = _foreach_mul[76]
    getitem_77: "f32[512][1]cuda:0" = _foreach_mul[77]
    getitem_78: "f32[512][1]cuda:0" = _foreach_mul[78]
    _foreach_mul = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:234 in _compute_clippy_shrinkage, code: masked_blocked_denom = torch._foreach_abs(masked_blocked_params)
    _foreach_abs = torch.ops.aten._foreach_abs.default(
        [
            arg80_1,
            arg81_1,
            arg82_1,
            arg83_1,
            arg84_1,
            arg85_1,
            arg86_1,
            arg87_1,
            arg88_1,
            arg89_1,
            arg90_1,
            arg91_1,
            arg92_1,
            arg93_1,
            arg94_1,
            arg95_1,
            arg96_1,
            arg97_1,
            arg98_1,
            arg99_1,
            arg100_1,
            arg101_1,
            arg102_1,
            arg103_1,
            arg104_1,
            arg105_1,
            arg106_1,
            arg107_1,
            arg108_1,
            arg109_1,
            arg110_1,
            arg111_1,
            arg112_1,
            arg113_1,
            arg114_1,
            arg115_1,
            arg116_1,
            arg117_1,
            arg118_1,
            arg119_1,
            arg120_1,
            arg121_1,
            arg122_1,
            arg123_1,
            arg124_1,
            arg125_1,
            arg126_1,
            arg127_1,
            arg128_1,
            arg129_1,
            arg130_1,
            arg131_1,
            arg132_1,
            arg133_1,
            arg134_1,
            arg135_1,
            arg136_1,
            arg137_1,
            arg138_1,
            arg139_1,
            arg140_1,
            arg141_1,
            arg142_1,
            arg143_1,
            arg144_1,
            arg145_1,
            arg146_1,
            arg147_1,
            arg148_1,
            arg149_1,
            arg150_1,
            arg151_1,
            arg152_1,
            arg153_1,
            arg154_1,
            arg155_1,
            arg156_1,
            arg157_1,
            arg158_1,
        ]
    )
    getitem_79: "f32[50][1]cuda:0" = _foreach_abs[0]
    getitem_80: "f32[23][1]cuda:0" = _foreach_abs[1]
    getitem_81: "f32[38][1]cuda:0" = _foreach_abs[2]
    getitem_82: "f32[5][1]cuda:0" = _foreach_abs[3]
    getitem_83: "f32[100][1]cuda:0" = _foreach_abs[4]
    getitem_84: "f32[50][1]cuda:0" = _foreach_abs[5]
    getitem_85: "f32[77][1]cuda:0" = _foreach_abs[6]
    getitem_86: "f32[100][1]cuda:0" = _foreach_abs[7]
    getitem_87: "f32[100][1]cuda:0" = _foreach_abs[8]
    getitem_88: "f32[96][1]cuda:0" = _foreach_abs[9]
    getitem_89: "f32[78][1]cuda:0" = _foreach_abs[10]
    getitem_90: "f32[100][1]cuda:0" = _foreach_abs[11]
    getitem_91: "f32[100][1]cuda:0" = _foreach_abs[12]
    getitem_92: "f32[97][1]cuda:0" = _foreach_abs[13]
    getitem_93: "f32[819, 732][732, 1]cuda:0" = _foreach_abs[14]
    getitem_94: "f32[204][1]cuda:0" = _foreach_abs[15]
    getitem_95: "f32[64][1]cuda:0" = _foreach_abs[16]
    getitem_96: "f32[204][1]cuda:0" = _foreach_abs[17]
    getitem_97: "f32[64, 204][204, 1]cuda:0" = _foreach_abs[18]
    getitem_98: "f32[204][1]cuda:0" = _foreach_abs[19]
    getitem_99: "f32[204, 160][160, 1]cuda:0" = _foreach_abs[20]
    getitem_100: "f32[204][1]cuda:0" = _foreach_abs[21]
    getitem_101: "f32[64][1]cuda:0" = _foreach_abs[22]
    getitem_102: "f32[204][1]cuda:0" = _foreach_abs[23]
    getitem_103: "f32[64, 204][204, 1]cuda:0" = _foreach_abs[24]
    getitem_104: "f32[204][1]cuda:0" = _foreach_abs[25]
    getitem_105: "f32[204][1]cuda:0" = _foreach_abs[26]
    getitem_106: "f32[64][1]cuda:0" = _foreach_abs[27]
    getitem_107: "f32[204][1]cuda:0" = _foreach_abs[28]
    getitem_108: "f32[64, 204][204, 1]cuda:0" = _foreach_abs[29]
    getitem_109: "f32[204][1]cuda:0" = _foreach_abs[30]
    getitem_110: "f32[204, 72][72, 1]cuda:0" = _foreach_abs[31]
    getitem_111: "f32[204][1]cuda:0" = _foreach_abs[32]
    getitem_112: "f32[64][1]cuda:0" = _foreach_abs[33]
    getitem_113: "f32[64, 204][204, 1]cuda:0" = _foreach_abs[34]
    getitem_114: "f32[768, 2675][2675, 1]cuda:0" = _foreach_abs[35]
    getitem_115: "f32[768, 2048][2048, 1]cuda:0" = _foreach_abs[36]
    getitem_116: "f32[768][1]cuda:0" = _foreach_abs[37]
    getitem_117: "f32[4096][1]cuda:0" = _foreach_abs[38]
    getitem_118: "f32[4096, 256][256, 1]cuda:0" = _foreach_abs[39]
    getitem_119: "f32[64][1]cuda:0" = _foreach_abs[40]
    getitem_120: "f32[2675][1]cuda:0" = _foreach_abs[41]
    getitem_121: "f32[1536, 4096][4096, 1]cuda:0" = _foreach_abs[42]
    getitem_122: "f32[4096][1]cuda:0" = _foreach_abs[43]
    getitem_123: "f32[1840][1]cuda:0" = _foreach_abs[44]
    getitem_124: "f32[2048, 2675][2675, 1]cuda:0" = _foreach_abs[45]
    getitem_125: "f32[2048][1]cuda:0" = _foreach_abs[46]
    getitem_126: "f32[2048][1]cuda:0" = _foreach_abs[47]
    getitem_127: "f32[768][1]cuda:0" = _foreach_abs[48]
    getitem_128: "f32[256][1]cuda:0" = _foreach_abs[49]
    getitem_129: "f32[768, 2048][2048, 1]cuda:0" = _foreach_abs[50]
    getitem_130: "f32[4096][1]cuda:0" = _foreach_abs[51]
    getitem_131: "f32[104][1]cuda:0" = _foreach_abs[52]
    getitem_132: "f32[768][1]cuda:0" = _foreach_abs[53]
    getitem_133: "f32[1024][1]cuda:0" = _foreach_abs[54]
    getitem_134: "f32[2048][1]cuda:0" = _foreach_abs[55]
    getitem_135: "f32[768, 2675][2675, 1]cuda:0" = _foreach_abs[56]
    getitem_136: "f32[2675][1]cuda:0" = _foreach_abs[57]
    getitem_137: "f32[256][1]cuda:0" = _foreach_abs[58]
    getitem_138: "f32[768][1]cuda:0" = _foreach_abs[59]
    getitem_139: "f32[256, 768][768, 1]cuda:0" = _foreach_abs[60]
    getitem_140: "f32[64][1]cuda:0" = _foreach_abs[61]
    getitem_141: "f32[1536][1]cuda:0" = _foreach_abs[62]
    getitem_142: "f32[2048][1]cuda:0" = _foreach_abs[63]
    getitem_143: "f32[3360][1]cuda:0" = _foreach_abs[64]
    getitem_144: "f32[768][1]cuda:0" = _foreach_abs[65]
    getitem_145: "f32[768, 2048][2048, 1]cuda:0" = _foreach_abs[66]
    getitem_146: "f32[256][1]cuda:0" = _foreach_abs[67]
    getitem_147: "f32[104, 256][256, 1]cuda:0" = _foreach_abs[68]
    getitem_148: "f32[2675][1]cuda:0" = _foreach_abs[69]
    getitem_149: "f32[768][1]cuda:0" = _foreach_abs[70]
    getitem_150: "f32[2048][1]cuda:0" = _foreach_abs[71]
    getitem_151: "f32[1024][1]cuda:0" = _foreach_abs[72]
    getitem_152: "f32[64, 612][612, 1]cuda:0" = _foreach_abs[73]
    getitem_153: "f32[128][1]cuda:0" = _foreach_abs[74]
    getitem_154: "f32[308, 256][256, 1]cuda:0" = _foreach_abs[75]
    getitem_155: "f32[1][1]cuda:0" = _foreach_abs[76]
    getitem_156: "f32[512][1]cuda:0" = _foreach_abs[77]
    getitem_157: "f32[512][1]cuda:0" = _foreach_abs[78]
    _foreach_abs = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:235 in _compute_clippy_shrinkage, code: torch._foreach_mul_(masked_blocked_denom, self._gamma1)
    _foreach_mul_1 = torch.ops.aten._foreach_mul.Scalar(
        [
            getitem_79,
            getitem_80,
            getitem_81,
            getitem_82,
            getitem_83,
            getitem_84,
            getitem_85,
            getitem_86,
            getitem_87,
            getitem_88,
            getitem_89,
            getitem_90,
            getitem_91,
            getitem_92,
            getitem_93,
            getitem_94,
            getitem_95,
            getitem_96,
            getitem_97,
            getitem_98,
            getitem_99,
            getitem_100,
            getitem_101,
            getitem_102,
            getitem_103,
            getitem_104,
            getitem_105,
            getitem_106,
            getitem_107,
            getitem_108,
            getitem_109,
            getitem_110,
            getitem_111,
            getitem_112,
            getitem_113,
            getitem_114,
            getitem_115,
            getitem_116,
            getitem_117,
            getitem_118,
            getitem_119,
            getitem_120,
            getitem_121,
            getitem_122,
            getitem_123,
            getitem_124,
            getitem_125,
            getitem_126,
            getitem_127,
            getitem_128,
            getitem_129,
            getitem_130,
            getitem_131,
            getitem_132,
            getitem_133,
            getitem_134,
            getitem_135,
            getitem_136,
            getitem_137,
            getitem_138,
            getitem_139,
            getitem_140,
            getitem_141,
            getitem_142,
            getitem_143,
            getitem_144,
            getitem_145,
            getitem_146,
            getitem_147,
            getitem_148,
            getitem_149,
            getitem_150,
            getitem_151,
            getitem_152,
            getitem_153,
            getitem_154,
            getitem_155,
            getitem_156,
            getitem_157,
        ],
        0.5,
    )
    getitem_79 = (
        getitem_80
    ) = (
        getitem_81
    ) = (
        getitem_82
    ) = (
        getitem_83
    ) = (
        getitem_84
    ) = (
        getitem_85
    ) = (
        getitem_86
    ) = (
        getitem_87
    ) = (
        getitem_88
    ) = (
        getitem_89
    ) = (
        getitem_90
    ) = (
        getitem_91
    ) = (
        getitem_92
    ) = (
        getitem_93
    ) = (
        getitem_94
    ) = (
        getitem_95
    ) = (
        getitem_96
    ) = (
        getitem_97
    ) = (
        getitem_98
    ) = (
        getitem_99
    ) = (
        getitem_100
    ) = (
        getitem_101
    ) = (
        getitem_102
    ) = (
        getitem_103
    ) = (
        getitem_104
    ) = (
        getitem_105
    ) = (
        getitem_106
    ) = (
        getitem_107
    ) = (
        getitem_108
    ) = (
        getitem_109
    ) = (
        getitem_110
    ) = (
        getitem_111
    ) = (
        getitem_112
    ) = (
        getitem_113
    ) = (
        getitem_114
    ) = (
        getitem_115
    ) = (
        getitem_116
    ) = (
        getitem_117
    ) = (
        getitem_118
    ) = (
        getitem_119
    ) = (
        getitem_120
    ) = (
        getitem_121
    ) = (
        getitem_122
    ) = (
        getitem_123
    ) = (
        getitem_124
    ) = (
        getitem_125
    ) = (
        getitem_126
    ) = (
        getitem_127
    ) = (
        getitem_128
    ) = (
        getitem_129
    ) = (
        getitem_130
    ) = (
        getitem_131
    ) = (
        getitem_132
    ) = (
        getitem_133
    ) = (
        getitem_134
    ) = (
        getitem_135
    ) = (
        getitem_136
    ) = (
        getitem_137
    ) = (
        getitem_138
    ) = (
        getitem_139
    ) = (
        getitem_140
    ) = (
        getitem_141
    ) = (
        getitem_142
    ) = (
        getitem_143
    ) = (
        getitem_144
    ) = (
        getitem_145
    ) = (
        getitem_146
    ) = (
        getitem_147
    ) = (
        getitem_148
    ) = (
        getitem_149
    ) = (
        getitem_150
    ) = (
        getitem_151
    ) = (
        getitem_152
    ) = getitem_153 = getitem_154 = getitem_155 = getitem_156 = getitem_157 = None
    getitem_158: "f32[50][1]cuda:0" = _foreach_mul_1[0]
    getitem_159: "f32[23][1]cuda:0" = _foreach_mul_1[1]
    getitem_160: "f32[38][1]cuda:0" = _foreach_mul_1[2]
    getitem_161: "f32[5][1]cuda:0" = _foreach_mul_1[3]
    getitem_162: "f32[100][1]cuda:0" = _foreach_mul_1[4]
    getitem_163: "f32[50][1]cuda:0" = _foreach_mul_1[5]
    getitem_164: "f32[77][1]cuda:0" = _foreach_mul_1[6]
    getitem_165: "f32[100][1]cuda:0" = _foreach_mul_1[7]
    getitem_166: "f32[100][1]cuda:0" = _foreach_mul_1[8]
    getitem_167: "f32[96][1]cuda:0" = _foreach_mul_1[9]
    getitem_168: "f32[78][1]cuda:0" = _foreach_mul_1[10]
    getitem_169: "f32[100][1]cuda:0" = _foreach_mul_1[11]
    getitem_170: "f32[100][1]cuda:0" = _foreach_mul_1[12]
    getitem_171: "f32[97][1]cuda:0" = _foreach_mul_1[13]
    getitem_172: "f32[819, 732][732, 1]cuda:0" = _foreach_mul_1[14]
    getitem_173: "f32[204][1]cuda:0" = _foreach_mul_1[15]
    getitem_174: "f32[64][1]cuda:0" = _foreach_mul_1[16]
    getitem_175: "f32[204][1]cuda:0" = _foreach_mul_1[17]
    getitem_176: "f32[64, 204][204, 1]cuda:0" = _foreach_mul_1[18]
    getitem_177: "f32[204][1]cuda:0" = _foreach_mul_1[19]
    getitem_178: "f32[204, 160][160, 1]cuda:0" = _foreach_mul_1[20]
    getitem_179: "f32[204][1]cuda:0" = _foreach_mul_1[21]
    getitem_180: "f32[64][1]cuda:0" = _foreach_mul_1[22]
    getitem_181: "f32[204][1]cuda:0" = _foreach_mul_1[23]
    getitem_182: "f32[64, 204][204, 1]cuda:0" = _foreach_mul_1[24]
    getitem_183: "f32[204][1]cuda:0" = _foreach_mul_1[25]
    getitem_184: "f32[204][1]cuda:0" = _foreach_mul_1[26]
    getitem_185: "f32[64][1]cuda:0" = _foreach_mul_1[27]
    getitem_186: "f32[204][1]cuda:0" = _foreach_mul_1[28]
    getitem_187: "f32[64, 204][204, 1]cuda:0" = _foreach_mul_1[29]
    getitem_188: "f32[204][1]cuda:0" = _foreach_mul_1[30]
    getitem_189: "f32[204, 72][72, 1]cuda:0" = _foreach_mul_1[31]
    getitem_190: "f32[204][1]cuda:0" = _foreach_mul_1[32]
    getitem_191: "f32[64][1]cuda:0" = _foreach_mul_1[33]
    getitem_192: "f32[64, 204][204, 1]cuda:0" = _foreach_mul_1[34]
    getitem_193: "f32[768, 2675][2675, 1]cuda:0" = _foreach_mul_1[35]
    getitem_194: "f32[768, 2048][2048, 1]cuda:0" = _foreach_mul_1[36]
    getitem_195: "f32[768][1]cuda:0" = _foreach_mul_1[37]
    getitem_196: "f32[4096][1]cuda:0" = _foreach_mul_1[38]
    getitem_197: "f32[4096, 256][256, 1]cuda:0" = _foreach_mul_1[39]
    getitem_198: "f32[64][1]cuda:0" = _foreach_mul_1[40]
    getitem_199: "f32[2675][1]cuda:0" = _foreach_mul_1[41]
    getitem_200: "f32[1536, 4096][4096, 1]cuda:0" = _foreach_mul_1[42]
    getitem_201: "f32[4096][1]cuda:0" = _foreach_mul_1[43]
    getitem_202: "f32[1840][1]cuda:0" = _foreach_mul_1[44]
    getitem_203: "f32[2048, 2675][2675, 1]cuda:0" = _foreach_mul_1[45]
    getitem_204: "f32[2048][1]cuda:0" = _foreach_mul_1[46]
    getitem_205: "f32[2048][1]cuda:0" = _foreach_mul_1[47]
    getitem_206: "f32[768][1]cuda:0" = _foreach_mul_1[48]
    getitem_207: "f32[256][1]cuda:0" = _foreach_mul_1[49]
    getitem_208: "f32[768, 2048][2048, 1]cuda:0" = _foreach_mul_1[50]
    getitem_209: "f32[4096][1]cuda:0" = _foreach_mul_1[51]
    getitem_210: "f32[104][1]cuda:0" = _foreach_mul_1[52]
    getitem_211: "f32[768][1]cuda:0" = _foreach_mul_1[53]
    getitem_212: "f32[1024][1]cuda:0" = _foreach_mul_1[54]
    getitem_213: "f32[2048][1]cuda:0" = _foreach_mul_1[55]
    getitem_214: "f32[768, 2675][2675, 1]cuda:0" = _foreach_mul_1[56]
    getitem_215: "f32[2675][1]cuda:0" = _foreach_mul_1[57]
    getitem_216: "f32[256][1]cuda:0" = _foreach_mul_1[58]
    getitem_217: "f32[768][1]cuda:0" = _foreach_mul_1[59]
    getitem_218: "f32[256, 768][768, 1]cuda:0" = _foreach_mul_1[60]
    getitem_219: "f32[64][1]cuda:0" = _foreach_mul_1[61]
    getitem_220: "f32[1536][1]cuda:0" = _foreach_mul_1[62]
    getitem_221: "f32[2048][1]cuda:0" = _foreach_mul_1[63]
    getitem_222: "f32[3360][1]cuda:0" = _foreach_mul_1[64]
    getitem_223: "f32[768][1]cuda:0" = _foreach_mul_1[65]
    getitem_224: "f32[768, 2048][2048, 1]cuda:0" = _foreach_mul_1[66]
    getitem_225: "f32[256][1]cuda:0" = _foreach_mul_1[67]
    getitem_226: "f32[104, 256][256, 1]cuda:0" = _foreach_mul_1[68]
    getitem_227: "f32[2675][1]cuda:0" = _foreach_mul_1[69]
    getitem_228: "f32[768][1]cuda:0" = _foreach_mul_1[70]
    getitem_229: "f32[2048][1]cuda:0" = _foreach_mul_1[71]
    getitem_230: "f32[1024][1]cuda:0" = _foreach_mul_1[72]
    getitem_231: "f32[64, 612][612, 1]cuda:0" = _foreach_mul_1[73]
    getitem_232: "f32[128][1]cuda:0" = _foreach_mul_1[74]
    getitem_233: "f32[308, 256][256, 1]cuda:0" = _foreach_mul_1[75]
    getitem_234: "f32[1][1]cuda:0" = _foreach_mul_1[76]
    getitem_235: "f32[512][1]cuda:0" = _foreach_mul_1[77]
    getitem_236: "f32[512][1]cuda:0" = _foreach_mul_1[78]
    _foreach_mul_1 = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:236 in _compute_clippy_shrinkage, code: torch._foreach_add_(masked_blocked_denom, self._gamma2)
    _foreach_add = torch.ops.aten._foreach_add.Scalar(
        [
            getitem_158,
            getitem_159,
            getitem_160,
            getitem_161,
            getitem_162,
            getitem_163,
            getitem_164,
            getitem_165,
            getitem_166,
            getitem_167,
            getitem_168,
            getitem_169,
            getitem_170,
            getitem_171,
            getitem_172,
            getitem_173,
            getitem_174,
            getitem_175,
            getitem_176,
            getitem_177,
            getitem_178,
            getitem_179,
            getitem_180,
            getitem_181,
            getitem_182,
            getitem_183,
            getitem_184,
            getitem_185,
            getitem_186,
            getitem_187,
            getitem_188,
            getitem_189,
            getitem_190,
            getitem_191,
            getitem_192,
            getitem_193,
            getitem_194,
            getitem_195,
            getitem_196,
            getitem_197,
            getitem_198,
            getitem_199,
            getitem_200,
            getitem_201,
            getitem_202,
            getitem_203,
            getitem_204,
            getitem_205,
            getitem_206,
            getitem_207,
            getitem_208,
            getitem_209,
            getitem_210,
            getitem_211,
            getitem_212,
            getitem_213,
            getitem_214,
            getitem_215,
            getitem_216,
            getitem_217,
            getitem_218,
            getitem_219,
            getitem_220,
            getitem_221,
            getitem_222,
            getitem_223,
            getitem_224,
            getitem_225,
            getitem_226,
            getitem_227,
            getitem_228,
            getitem_229,
            getitem_230,
            getitem_231,
            getitem_232,
            getitem_233,
            getitem_234,
            getitem_235,
            getitem_236,
        ],
        0.01,
    )
    getitem_158 = (
        getitem_159
    ) = (
        getitem_160
    ) = (
        getitem_161
    ) = (
        getitem_162
    ) = (
        getitem_163
    ) = (
        getitem_164
    ) = (
        getitem_165
    ) = (
        getitem_166
    ) = (
        getitem_167
    ) = (
        getitem_168
    ) = (
        getitem_169
    ) = (
        getitem_170
    ) = (
        getitem_171
    ) = (
        getitem_172
    ) = (
        getitem_173
    ) = (
        getitem_174
    ) = (
        getitem_175
    ) = (
        getitem_176
    ) = (
        getitem_177
    ) = (
        getitem_178
    ) = (
        getitem_179
    ) = (
        getitem_180
    ) = (
        getitem_181
    ) = (
        getitem_182
    ) = (
        getitem_183
    ) = (
        getitem_184
    ) = (
        getitem_185
    ) = (
        getitem_186
    ) = (
        getitem_187
    ) = (
        getitem_188
    ) = (
        getitem_189
    ) = (
        getitem_190
    ) = (
        getitem_191
    ) = (
        getitem_192
    ) = (
        getitem_193
    ) = (
        getitem_194
    ) = (
        getitem_195
    ) = (
        getitem_196
    ) = (
        getitem_197
    ) = (
        getitem_198
    ) = (
        getitem_199
    ) = (
        getitem_200
    ) = (
        getitem_201
    ) = (
        getitem_202
    ) = (
        getitem_203
    ) = (
        getitem_204
    ) = (
        getitem_205
    ) = (
        getitem_206
    ) = (
        getitem_207
    ) = (
        getitem_208
    ) = (
        getitem_209
    ) = (
        getitem_210
    ) = (
        getitem_211
    ) = (
        getitem_212
    ) = (
        getitem_213
    ) = (
        getitem_214
    ) = (
        getitem_215
    ) = (
        getitem_216
    ) = (
        getitem_217
    ) = (
        getitem_218
    ) = (
        getitem_219
    ) = (
        getitem_220
    ) = (
        getitem_221
    ) = (
        getitem_222
    ) = (
        getitem_223
    ) = (
        getitem_224
    ) = (
        getitem_225
    ) = (
        getitem_226
    ) = (
        getitem_227
    ) = (
        getitem_228
    ) = (
        getitem_229
    ) = (
        getitem_230
    ) = (
        getitem_231
    ) = getitem_232 = getitem_233 = getitem_234 = getitem_235 = getitem_236 = None
    getitem_237: "f32[50][1]cuda:0" = _foreach_add[0]
    getitem_238: "f32[23][1]cuda:0" = _foreach_add[1]
    getitem_239: "f32[38][1]cuda:0" = _foreach_add[2]
    getitem_240: "f32[5][1]cuda:0" = _foreach_add[3]
    getitem_241: "f32[100][1]cuda:0" = _foreach_add[4]
    getitem_242: "f32[50][1]cuda:0" = _foreach_add[5]
    getitem_243: "f32[77][1]cuda:0" = _foreach_add[6]
    getitem_244: "f32[100][1]cuda:0" = _foreach_add[7]
    getitem_245: "f32[100][1]cuda:0" = _foreach_add[8]
    getitem_246: "f32[96][1]cuda:0" = _foreach_add[9]
    getitem_247: "f32[78][1]cuda:0" = _foreach_add[10]
    getitem_248: "f32[100][1]cuda:0" = _foreach_add[11]
    getitem_249: "f32[100][1]cuda:0" = _foreach_add[12]
    getitem_250: "f32[97][1]cuda:0" = _foreach_add[13]
    getitem_251: "f32[819, 732][732, 1]cuda:0" = _foreach_add[14]
    getitem_252: "f32[204][1]cuda:0" = _foreach_add[15]
    getitem_253: "f32[64][1]cuda:0" = _foreach_add[16]
    getitem_254: "f32[204][1]cuda:0" = _foreach_add[17]
    getitem_255: "f32[64, 204][204, 1]cuda:0" = _foreach_add[18]
    getitem_256: "f32[204][1]cuda:0" = _foreach_add[19]
    getitem_257: "f32[204, 160][160, 1]cuda:0" = _foreach_add[20]
    getitem_258: "f32[204][1]cuda:0" = _foreach_add[21]
    getitem_259: "f32[64][1]cuda:0" = _foreach_add[22]
    getitem_260: "f32[204][1]cuda:0" = _foreach_add[23]
    getitem_261: "f32[64, 204][204, 1]cuda:0" = _foreach_add[24]
    getitem_262: "f32[204][1]cuda:0" = _foreach_add[25]
    getitem_263: "f32[204][1]cuda:0" = _foreach_add[26]
    getitem_264: "f32[64][1]cuda:0" = _foreach_add[27]
    getitem_265: "f32[204][1]cuda:0" = _foreach_add[28]
    getitem_266: "f32[64, 204][204, 1]cuda:0" = _foreach_add[29]
    getitem_267: "f32[204][1]cuda:0" = _foreach_add[30]
    getitem_268: "f32[204, 72][72, 1]cuda:0" = _foreach_add[31]
    getitem_269: "f32[204][1]cuda:0" = _foreach_add[32]
    getitem_270: "f32[64][1]cuda:0" = _foreach_add[33]
    getitem_271: "f32[64, 204][204, 1]cuda:0" = _foreach_add[34]
    getitem_272: "f32[768, 2675][2675, 1]cuda:0" = _foreach_add[35]
    getitem_273: "f32[768, 2048][2048, 1]cuda:0" = _foreach_add[36]
    getitem_274: "f32[768][1]cuda:0" = _foreach_add[37]
    getitem_275: "f32[4096][1]cuda:0" = _foreach_add[38]
    getitem_276: "f32[4096, 256][256, 1]cuda:0" = _foreach_add[39]
    getitem_277: "f32[64][1]cuda:0" = _foreach_add[40]
    getitem_278: "f32[2675][1]cuda:0" = _foreach_add[41]
    getitem_279: "f32[1536, 4096][4096, 1]cuda:0" = _foreach_add[42]
    getitem_280: "f32[4096][1]cuda:0" = _foreach_add[43]
    getitem_281: "f32[1840][1]cuda:0" = _foreach_add[44]
    getitem_282: "f32[2048, 2675][2675, 1]cuda:0" = _foreach_add[45]
    getitem_283: "f32[2048][1]cuda:0" = _foreach_add[46]
    getitem_284: "f32[2048][1]cuda:0" = _foreach_add[47]
    getitem_285: "f32[768][1]cuda:0" = _foreach_add[48]
    getitem_286: "f32[256][1]cuda:0" = _foreach_add[49]
    getitem_287: "f32[768, 2048][2048, 1]cuda:0" = _foreach_add[50]
    getitem_288: "f32[4096][1]cuda:0" = _foreach_add[51]
    getitem_289: "f32[104][1]cuda:0" = _foreach_add[52]
    getitem_290: "f32[768][1]cuda:0" = _foreach_add[53]
    getitem_291: "f32[1024][1]cuda:0" = _foreach_add[54]
    getitem_292: "f32[2048][1]cuda:0" = _foreach_add[55]
    getitem_293: "f32[768, 2675][2675, 1]cuda:0" = _foreach_add[56]
    getitem_294: "f32[2675][1]cuda:0" = _foreach_add[57]
    getitem_295: "f32[256][1]cuda:0" = _foreach_add[58]
    getitem_296: "f32[768][1]cuda:0" = _foreach_add[59]
    getitem_297: "f32[256, 768][768, 1]cuda:0" = _foreach_add[60]
    getitem_298: "f32[64][1]cuda:0" = _foreach_add[61]
    getitem_299: "f32[1536][1]cuda:0" = _foreach_add[62]
    getitem_300: "f32[2048][1]cuda:0" = _foreach_add[63]
    getitem_301: "f32[3360][1]cuda:0" = _foreach_add[64]
    getitem_302: "f32[768][1]cuda:0" = _foreach_add[65]
    getitem_303: "f32[768, 2048][2048, 1]cuda:0" = _foreach_add[66]
    getitem_304: "f32[256][1]cuda:0" = _foreach_add[67]
    getitem_305: "f32[104, 256][256, 1]cuda:0" = _foreach_add[68]
    getitem_306: "f32[2675][1]cuda:0" = _foreach_add[69]
    getitem_307: "f32[768][1]cuda:0" = _foreach_add[70]
    getitem_308: "f32[2048][1]cuda:0" = _foreach_add[71]
    getitem_309: "f32[1024][1]cuda:0" = _foreach_add[72]
    getitem_310: "f32[64, 612][612, 1]cuda:0" = _foreach_add[73]
    getitem_311: "f32[128][1]cuda:0" = _foreach_add[74]
    getitem_312: "f32[308, 256][256, 1]cuda:0" = _foreach_add[75]
    getitem_313: "f32[1][1]cuda:0" = _foreach_add[76]
    getitem_314: "f32[512][1]cuda:0" = _foreach_add[77]
    getitem_315: "f32[512][1]cuda:0" = _foreach_add[78]
    _foreach_add = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:237 in _compute_clippy_shrinkage, code: torch._foreach_div_(masked_blocked_nom, masked_blocked_denom)
    _foreach_div = torch.ops.aten._foreach_div.List(
        [
            getitem,
            getitem_1,
            getitem_2,
            getitem_3,
            getitem_4,
            getitem_5,
            getitem_6,
            getitem_7,
            getitem_8,
            getitem_9,
            getitem_10,
            getitem_11,
            getitem_12,
            getitem_13,
            getitem_14,
            getitem_15,
            getitem_16,
            getitem_17,
            getitem_18,
            getitem_19,
            getitem_20,
            getitem_21,
            getitem_22,
            getitem_23,
            getitem_24,
            getitem_25,
            getitem_26,
            getitem_27,
            getitem_28,
            getitem_29,
            getitem_30,
            getitem_31,
            getitem_32,
            getitem_33,
            getitem_34,
            getitem_35,
            getitem_36,
            getitem_37,
            getitem_38,
            getitem_39,
            getitem_40,
            getitem_41,
            getitem_42,
            getitem_43,
            getitem_44,
            getitem_45,
            getitem_46,
            getitem_47,
            getitem_48,
            getitem_49,
            getitem_50,
            getitem_51,
            getitem_52,
            getitem_53,
            getitem_54,
            getitem_55,
            getitem_56,
            getitem_57,
            getitem_58,
            getitem_59,
            getitem_60,
            getitem_61,
            getitem_62,
            getitem_63,
            getitem_64,
            getitem_65,
            getitem_66,
            getitem_67,
            getitem_68,
            getitem_69,
            getitem_70,
            getitem_71,
            getitem_72,
            getitem_73,
            getitem_74,
            getitem_75,
            getitem_76,
            getitem_77,
            getitem_78,
        ],
        [
            getitem_237,
            getitem_238,
            getitem_239,
            getitem_240,
            getitem_241,
            getitem_242,
            getitem_243,
            getitem_244,
            getitem_245,
            getitem_246,
            getitem_247,
            getitem_248,
            getitem_249,
            getitem_250,
            getitem_251,
            getitem_252,
            getitem_253,
            getitem_254,
            getitem_255,
            getitem_256,
            getitem_257,
            getitem_258,
            getitem_259,
            getitem_260,
            getitem_261,
            getitem_262,
            getitem_263,
            getitem_264,
            getitem_265,
            getitem_266,
            getitem_267,
            getitem_268,
            getitem_269,
            getitem_270,
            getitem_271,
            getitem_272,
            getitem_273,
            getitem_274,
            getitem_275,
            getitem_276,
            getitem_277,
            getitem_278,
            getitem_279,
            getitem_280,
            getitem_281,
            getitem_282,
            getitem_283,
            getitem_284,
            getitem_285,
            getitem_286,
            getitem_287,
            getitem_288,
            getitem_289,
            getitem_290,
            getitem_291,
            getitem_292,
            getitem_293,
            getitem_294,
            getitem_295,
            getitem_296,
            getitem_297,
            getitem_298,
            getitem_299,
            getitem_300,
            getitem_301,
            getitem_302,
            getitem_303,
            getitem_304,
            getitem_305,
            getitem_306,
            getitem_307,
            getitem_308,
            getitem_309,
            getitem_310,
            getitem_311,
            getitem_312,
            getitem_313,
            getitem_314,
            getitem_315,
        ],
    )
    getitem = (
        getitem_1
    ) = (
        getitem_2
    ) = (
        getitem_3
    ) = (
        getitem_4
    ) = (
        getitem_5
    ) = (
        getitem_6
    ) = (
        getitem_7
    ) = (
        getitem_8
    ) = (
        getitem_9
    ) = (
        getitem_10
    ) = (
        getitem_11
    ) = (
        getitem_12
    ) = (
        getitem_13
    ) = (
        getitem_14
    ) = (
        getitem_15
    ) = (
        getitem_16
    ) = (
        getitem_17
    ) = (
        getitem_18
    ) = (
        getitem_19
    ) = (
        getitem_20
    ) = (
        getitem_21
    ) = (
        getitem_22
    ) = (
        getitem_23
    ) = (
        getitem_24
    ) = (
        getitem_25
    ) = (
        getitem_26
    ) = (
        getitem_27
    ) = (
        getitem_28
    ) = (
        getitem_29
    ) = (
        getitem_30
    ) = (
        getitem_31
    ) = (
        getitem_32
    ) = (
        getitem_33
    ) = (
        getitem_34
    ) = (
        getitem_35
    ) = (
        getitem_36
    ) = (
        getitem_37
    ) = (
        getitem_38
    ) = (
        getitem_39
    ) = (
        getitem_40
    ) = (
        getitem_41
    ) = (
        getitem_42
    ) = (
        getitem_43
    ) = (
        getitem_44
    ) = (
        getitem_45
    ) = (
        getitem_46
    ) = (
        getitem_47
    ) = (
        getitem_48
    ) = (
        getitem_49
    ) = (
        getitem_50
    ) = (
        getitem_51
    ) = (
        getitem_52
    ) = (
        getitem_53
    ) = (
        getitem_54
    ) = (
        getitem_55
    ) = (
        getitem_56
    ) = (
        getitem_57
    ) = (
        getitem_58
    ) = (
        getitem_59
    ) = (
        getitem_60
    ) = (
        getitem_61
    ) = (
        getitem_62
    ) = (
        getitem_63
    ) = (
        getitem_64
    ) = (
        getitem_65
    ) = (
        getitem_66
    ) = (
        getitem_67
    ) = (
        getitem_68
    ) = (
        getitem_69
    ) = (
        getitem_70
    ) = (
        getitem_71
    ) = (
        getitem_72
    ) = (
        getitem_73
    ) = (
        getitem_74
    ) = (
        getitem_75
    ) = (
        getitem_76
    ) = (
        getitem_77
    ) = (
        getitem_78
    ) = (
        getitem_237
    ) = (
        getitem_238
    ) = (
        getitem_239
    ) = (
        getitem_240
    ) = (
        getitem_241
    ) = (
        getitem_242
    ) = (
        getitem_243
    ) = (
        getitem_244
    ) = (
        getitem_245
    ) = (
        getitem_246
    ) = (
        getitem_247
    ) = (
        getitem_248
    ) = (
        getitem_249
    ) = (
        getitem_250
    ) = (
        getitem_251
    ) = (
        getitem_252
    ) = (
        getitem_253
    ) = (
        getitem_254
    ) = (
        getitem_255
    ) = (
        getitem_256
    ) = (
        getitem_257
    ) = (
        getitem_258
    ) = (
        getitem_259
    ) = (
        getitem_260
    ) = (
        getitem_261
    ) = (
        getitem_262
    ) = (
        getitem_263
    ) = (
        getitem_264
    ) = (
        getitem_265
    ) = (
        getitem_266
    ) = (
        getitem_267
    ) = (
        getitem_268
    ) = (
        getitem_269
    ) = (
        getitem_270
    ) = (
        getitem_271
    ) = (
        getitem_272
    ) = (
        getitem_273
    ) = (
        getitem_274
    ) = (
        getitem_275
    ) = (
        getitem_276
    ) = (
        getitem_277
    ) = (
        getitem_278
    ) = (
        getitem_279
    ) = (
        getitem_280
    ) = (
        getitem_281
    ) = (
        getitem_282
    ) = (
        getitem_283
    ) = (
        getitem_284
    ) = (
        getitem_285
    ) = (
        getitem_286
    ) = (
        getitem_287
    ) = (
        getitem_288
    ) = (
        getitem_289
    ) = (
        getitem_290
    ) = (
        getitem_291
    ) = (
        getitem_292
    ) = (
        getitem_293
    ) = (
        getitem_294
    ) = (
        getitem_295
    ) = (
        getitem_296
    ) = (
        getitem_297
    ) = (
        getitem_298
    ) = (
        getitem_299
    ) = (
        getitem_300
    ) = (
        getitem_301
    ) = (
        getitem_302
    ) = (
        getitem_303
    ) = (
        getitem_304
    ) = (
        getitem_305
    ) = (
        getitem_306
    ) = (
        getitem_307
    ) = (
        getitem_308
    ) = (
        getitem_309
    ) = (
        getitem_310
    ) = getitem_311 = getitem_312 = getitem_313 = getitem_314 = getitem_315 = None
    getitem_316: "f32[50][1]cuda:0" = _foreach_div[0]
    getitem_317: "f32[23][1]cuda:0" = _foreach_div[1]
    getitem_318: "f32[38][1]cuda:0" = _foreach_div[2]
    getitem_319: "f32[5][1]cuda:0" = _foreach_div[3]
    getitem_320: "f32[100][1]cuda:0" = _foreach_div[4]
    getitem_321: "f32[50][1]cuda:0" = _foreach_div[5]
    getitem_322: "f32[77][1]cuda:0" = _foreach_div[6]
    getitem_323: "f32[100][1]cuda:0" = _foreach_div[7]
    getitem_324: "f32[100][1]cuda:0" = _foreach_div[8]
    getitem_325: "f32[96][1]cuda:0" = _foreach_div[9]
    getitem_326: "f32[78][1]cuda:0" = _foreach_div[10]
    getitem_327: "f32[100][1]cuda:0" = _foreach_div[11]
    getitem_328: "f32[100][1]cuda:0" = _foreach_div[12]
    getitem_329: "f32[97][1]cuda:0" = _foreach_div[13]
    getitem_330: "f32[819, 732][732, 1]cuda:0" = _foreach_div[14]
    getitem_331: "f32[204][1]cuda:0" = _foreach_div[15]
    getitem_332: "f32[64][1]cuda:0" = _foreach_div[16]
    getitem_333: "f32[204][1]cuda:0" = _foreach_div[17]
    getitem_334: "f32[64, 204][204, 1]cuda:0" = _foreach_div[18]
    getitem_335: "f32[204][1]cuda:0" = _foreach_div[19]
    getitem_336: "f32[204, 160][160, 1]cuda:0" = _foreach_div[20]
    getitem_337: "f32[204][1]cuda:0" = _foreach_div[21]
    getitem_338: "f32[64][1]cuda:0" = _foreach_div[22]
    getitem_339: "f32[204][1]cuda:0" = _foreach_div[23]
    getitem_340: "f32[64, 204][204, 1]cuda:0" = _foreach_div[24]
    getitem_341: "f32[204][1]cuda:0" = _foreach_div[25]
    getitem_342: "f32[204][1]cuda:0" = _foreach_div[26]
    getitem_343: "f32[64][1]cuda:0" = _foreach_div[27]
    getitem_344: "f32[204][1]cuda:0" = _foreach_div[28]
    getitem_345: "f32[64, 204][204, 1]cuda:0" = _foreach_div[29]
    getitem_346: "f32[204][1]cuda:0" = _foreach_div[30]
    getitem_347: "f32[204, 72][72, 1]cuda:0" = _foreach_div[31]
    getitem_348: "f32[204][1]cuda:0" = _foreach_div[32]
    getitem_349: "f32[64][1]cuda:0" = _foreach_div[33]
    getitem_350: "f32[64, 204][204, 1]cuda:0" = _foreach_div[34]
    getitem_351: "f32[768, 2675][2675, 1]cuda:0" = _foreach_div[35]
    getitem_352: "f32[768, 2048][2048, 1]cuda:0" = _foreach_div[36]
    getitem_353: "f32[768][1]cuda:0" = _foreach_div[37]
    getitem_354: "f32[4096][1]cuda:0" = _foreach_div[38]
    getitem_355: "f32[4096, 256][256, 1]cuda:0" = _foreach_div[39]
    getitem_356: "f32[64][1]cuda:0" = _foreach_div[40]
    getitem_357: "f32[2675][1]cuda:0" = _foreach_div[41]
    getitem_358: "f32[1536, 4096][4096, 1]cuda:0" = _foreach_div[42]
    getitem_359: "f32[4096][1]cuda:0" = _foreach_div[43]
    getitem_360: "f32[1840][1]cuda:0" = _foreach_div[44]
    getitem_361: "f32[2048, 2675][2675, 1]cuda:0" = _foreach_div[45]
    getitem_362: "f32[2048][1]cuda:0" = _foreach_div[46]
    getitem_363: "f32[2048][1]cuda:0" = _foreach_div[47]
    getitem_364: "f32[768][1]cuda:0" = _foreach_div[48]
    getitem_365: "f32[256][1]cuda:0" = _foreach_div[49]
    getitem_366: "f32[768, 2048][2048, 1]cuda:0" = _foreach_div[50]
    getitem_367: "f32[4096][1]cuda:0" = _foreach_div[51]
    getitem_368: "f32[104][1]cuda:0" = _foreach_div[52]
    getitem_369: "f32[768][1]cuda:0" = _foreach_div[53]
    getitem_370: "f32[1024][1]cuda:0" = _foreach_div[54]
    getitem_371: "f32[2048][1]cuda:0" = _foreach_div[55]
    getitem_372: "f32[768, 2675][2675, 1]cuda:0" = _foreach_div[56]
    getitem_373: "f32[2675][1]cuda:0" = _foreach_div[57]
    getitem_374: "f32[256][1]cuda:0" = _foreach_div[58]
    getitem_375: "f32[768][1]cuda:0" = _foreach_div[59]
    getitem_376: "f32[256, 768][768, 1]cuda:0" = _foreach_div[60]
    getitem_377: "f32[64][1]cuda:0" = _foreach_div[61]
    getitem_378: "f32[1536][1]cuda:0" = _foreach_div[62]
    getitem_379: "f32[2048][1]cuda:0" = _foreach_div[63]
    getitem_380: "f32[3360][1]cuda:0" = _foreach_div[64]
    getitem_381: "f32[768][1]cuda:0" = _foreach_div[65]
    getitem_382: "f32[768, 2048][2048, 1]cuda:0" = _foreach_div[66]
    getitem_383: "f32[256][1]cuda:0" = _foreach_div[67]
    getitem_384: "f32[104, 256][256, 1]cuda:0" = _foreach_div[68]
    getitem_385: "f32[2675][1]cuda:0" = _foreach_div[69]
    getitem_386: "f32[768][1]cuda:0" = _foreach_div[70]
    getitem_387: "f32[2048][1]cuda:0" = _foreach_div[71]
    getitem_388: "f32[1024][1]cuda:0" = _foreach_div[72]
    getitem_389: "f32[64, 612][612, 1]cuda:0" = _foreach_div[73]
    getitem_390: "f32[128][1]cuda:0" = _foreach_div[74]
    getitem_391: "f32[308, 256][256, 1]cuda:0" = _foreach_div[75]
    getitem_392: "f32[1][1]cuda:0" = _foreach_div[76]
    getitem_393: "f32[512][1]cuda:0" = _foreach_div[77]
    getitem_394: "f32[512][1]cuda:0" = _foreach_div[78]
    _foreach_div = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:238 in _compute_clippy_shrinkage, code: masked_blocked_shrinkage = torch._foreach_norm(masked_blocked_nom, float("inf"))
    _foreach_norm = torch.ops.aten._foreach_norm.Scalar(
        [
            getitem_316,
            getitem_317,
            getitem_318,
            getitem_319,
            getitem_320,
            getitem_321,
            getitem_322,
            getitem_323,
            getitem_324,
            getitem_325,
            getitem_326,
            getitem_327,
            getitem_328,
            getitem_329,
            getitem_330,
            getitem_331,
            getitem_332,
            getitem_333,
            getitem_334,
            getitem_335,
            getitem_336,
            getitem_337,
            getitem_338,
            getitem_339,
            getitem_340,
            getitem_341,
            getitem_342,
            getitem_343,
            getitem_344,
            getitem_345,
            getitem_346,
            getitem_347,
            getitem_348,
            getitem_349,
            getitem_350,
            getitem_351,
            getitem_352,
            getitem_353,
            getitem_354,
            getitem_355,
            getitem_356,
            getitem_357,
            getitem_358,
            getitem_359,
            getitem_360,
            getitem_361,
            getitem_362,
            getitem_363,
            getitem_364,
            getitem_365,
            getitem_366,
            getitem_367,
            getitem_368,
            getitem_369,
            getitem_370,
            getitem_371,
            getitem_372,
            getitem_373,
            getitem_374,
            getitem_375,
            getitem_376,
            getitem_377,
            getitem_378,
            getitem_379,
            getitem_380,
            getitem_381,
            getitem_382,
            getitem_383,
            getitem_384,
            getitem_385,
            getitem_386,
            getitem_387,
            getitem_388,
            getitem_389,
            getitem_390,
            getitem_391,
            getitem_392,
            getitem_393,
            getitem_394,
        ],
        inf,
    )
    getitem_316 = (
        getitem_317
    ) = (
        getitem_318
    ) = (
        getitem_319
    ) = (
        getitem_320
    ) = (
        getitem_321
    ) = (
        getitem_322
    ) = (
        getitem_323
    ) = (
        getitem_324
    ) = (
        getitem_325
    ) = (
        getitem_326
    ) = (
        getitem_327
    ) = (
        getitem_328
    ) = (
        getitem_329
    ) = (
        getitem_330
    ) = (
        getitem_331
    ) = (
        getitem_332
    ) = (
        getitem_333
    ) = (
        getitem_334
    ) = (
        getitem_335
    ) = (
        getitem_336
    ) = (
        getitem_337
    ) = (
        getitem_338
    ) = (
        getitem_339
    ) = (
        getitem_340
    ) = (
        getitem_341
    ) = (
        getitem_342
    ) = (
        getitem_343
    ) = (
        getitem_344
    ) = (
        getitem_345
    ) = (
        getitem_346
    ) = (
        getitem_347
    ) = (
        getitem_348
    ) = (
        getitem_349
    ) = (
        getitem_350
    ) = (
        getitem_351
    ) = (
        getitem_352
    ) = (
        getitem_353
    ) = (
        getitem_354
    ) = (
        getitem_355
    ) = (
        getitem_356
    ) = (
        getitem_357
    ) = (
        getitem_358
    ) = (
        getitem_359
    ) = (
        getitem_360
    ) = (
        getitem_361
    ) = (
        getitem_362
    ) = (
        getitem_363
    ) = (
        getitem_364
    ) = (
        getitem_365
    ) = (
        getitem_366
    ) = (
        getitem_367
    ) = (
        getitem_368
    ) = (
        getitem_369
    ) = (
        getitem_370
    ) = (
        getitem_371
    ) = (
        getitem_372
    ) = (
        getitem_373
    ) = (
        getitem_374
    ) = (
        getitem_375
    ) = (
        getitem_376
    ) = (
        getitem_377
    ) = (
        getitem_378
    ) = (
        getitem_379
    ) = (
        getitem_380
    ) = (
        getitem_381
    ) = (
        getitem_382
    ) = (
        getitem_383
    ) = (
        getitem_384
    ) = (
        getitem_385
    ) = (
        getitem_386
    ) = (
        getitem_387
    ) = (
        getitem_388
    ) = (
        getitem_389
    ) = getitem_390 = getitem_391 = getitem_392 = getitem_393 = getitem_394 = None
    getitem_395: "f32[][]cuda:0" = _foreach_norm[0]
    getitem_396: "f32[][]cuda:0" = _foreach_norm[1]
    getitem_397: "f32[][]cuda:0" = _foreach_norm[2]
    getitem_398: "f32[][]cuda:0" = _foreach_norm[3]
    getitem_399: "f32[][]cuda:0" = _foreach_norm[4]
    getitem_400: "f32[][]cuda:0" = _foreach_norm[5]
    getitem_401: "f32[][]cuda:0" = _foreach_norm[6]
    getitem_402: "f32[][]cuda:0" = _foreach_norm[7]
    getitem_403: "f32[][]cuda:0" = _foreach_norm[8]
    getitem_404: "f32[][]cuda:0" = _foreach_norm[9]
    getitem_405: "f32[][]cuda:0" = _foreach_norm[10]
    getitem_406: "f32[][]cuda:0" = _foreach_norm[11]
    getitem_407: "f32[][]cuda:0" = _foreach_norm[12]
    getitem_408: "f32[][]cuda:0" = _foreach_norm[13]
    getitem_409: "f32[][]cuda:0" = _foreach_norm[14]
    getitem_410: "f32[][]cuda:0" = _foreach_norm[15]
    getitem_411: "f32[][]cuda:0" = _foreach_norm[16]
    getitem_412: "f32[][]cuda:0" = _foreach_norm[17]
    getitem_413: "f32[][]cuda:0" = _foreach_norm[18]
    getitem_414: "f32[][]cuda:0" = _foreach_norm[19]
    getitem_415: "f32[][]cuda:0" = _foreach_norm[20]
    getitem_416: "f32[][]cuda:0" = _foreach_norm[21]
    getitem_417: "f32[][]cuda:0" = _foreach_norm[22]
    getitem_418: "f32[][]cuda:0" = _foreach_norm[23]
    getitem_419: "f32[][]cuda:0" = _foreach_norm[24]
    getitem_420: "f32[][]cuda:0" = _foreach_norm[25]
    getitem_421: "f32[][]cuda:0" = _foreach_norm[26]
    getitem_422: "f32[][]cuda:0" = _foreach_norm[27]
    getitem_423: "f32[][]cuda:0" = _foreach_norm[28]
    getitem_424: "f32[][]cuda:0" = _foreach_norm[29]
    getitem_425: "f32[][]cuda:0" = _foreach_norm[30]
    getitem_426: "f32[][]cuda:0" = _foreach_norm[31]
    getitem_427: "f32[][]cuda:0" = _foreach_norm[32]
    getitem_428: "f32[][]cuda:0" = _foreach_norm[33]
    getitem_429: "f32[][]cuda:0" = _foreach_norm[34]
    getitem_430: "f32[][]cuda:0" = _foreach_norm[35]
    getitem_431: "f32[][]cuda:0" = _foreach_norm[36]
    getitem_432: "f32[][]cuda:0" = _foreach_norm[37]
    getitem_433: "f32[][]cuda:0" = _foreach_norm[38]
    getitem_434: "f32[][]cuda:0" = _foreach_norm[39]
    getitem_435: "f32[][]cuda:0" = _foreach_norm[40]
    getitem_436: "f32[][]cuda:0" = _foreach_norm[41]
    getitem_437: "f32[][]cuda:0" = _foreach_norm[42]
    getitem_438: "f32[][]cuda:0" = _foreach_norm[43]
    getitem_439: "f32[][]cuda:0" = _foreach_norm[44]
    getitem_440: "f32[][]cuda:0" = _foreach_norm[45]
    getitem_441: "f32[][]cuda:0" = _foreach_norm[46]
    getitem_442: "f32[][]cuda:0" = _foreach_norm[47]
    getitem_443: "f32[][]cuda:0" = _foreach_norm[48]
    getitem_444: "f32[][]cuda:0" = _foreach_norm[49]
    getitem_445: "f32[][]cuda:0" = _foreach_norm[50]
    getitem_446: "f32[][]cuda:0" = _foreach_norm[51]
    getitem_447: "f32[][]cuda:0" = _foreach_norm[52]
    getitem_448: "f32[][]cuda:0" = _foreach_norm[53]
    getitem_449: "f32[][]cuda:0" = _foreach_norm[54]
    getitem_450: "f32[][]cuda:0" = _foreach_norm[55]
    getitem_451: "f32[][]cuda:0" = _foreach_norm[56]
    getitem_452: "f32[][]cuda:0" = _foreach_norm[57]
    getitem_453: "f32[][]cuda:0" = _foreach_norm[58]
    getitem_454: "f32[][]cuda:0" = _foreach_norm[59]
    getitem_455: "f32[][]cuda:0" = _foreach_norm[60]
    getitem_456: "f32[][]cuda:0" = _foreach_norm[61]
    getitem_457: "f32[][]cuda:0" = _foreach_norm[62]
    getitem_458: "f32[][]cuda:0" = _foreach_norm[63]
    getitem_459: "f32[][]cuda:0" = _foreach_norm[64]
    getitem_460: "f32[][]cuda:0" = _foreach_norm[65]
    getitem_461: "f32[][]cuda:0" = _foreach_norm[66]
    getitem_462: "f32[][]cuda:0" = _foreach_norm[67]
    getitem_463: "f32[][]cuda:0" = _foreach_norm[68]
    getitem_464: "f32[][]cuda:0" = _foreach_norm[69]
    getitem_465: "f32[][]cuda:0" = _foreach_norm[70]
    getitem_466: "f32[][]cuda:0" = _foreach_norm[71]
    getitem_467: "f32[][]cuda:0" = _foreach_norm[72]
    getitem_468: "f32[][]cuda:0" = _foreach_norm[73]
    getitem_469: "f32[][]cuda:0" = _foreach_norm[74]
    getitem_470: "f32[][]cuda:0" = _foreach_norm[75]
    getitem_471: "f32[][]cuda:0" = _foreach_norm[76]
    getitem_472: "f32[][]cuda:0" = _foreach_norm[77]
    getitem_473: "f32[][]cuda:0" = _foreach_norm[78]
    _foreach_norm = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:239 in _compute_clippy_shrinkage, code: torch._foreach_maximum_(masked_blocked_shrinkage, 1.0)
    _foreach_maximum = torch.ops.aten._foreach_maximum.Scalar(
        [
            getitem_395,
            getitem_396,
            getitem_397,
            getitem_398,
            getitem_399,
            getitem_400,
            getitem_401,
            getitem_402,
            getitem_403,
            getitem_404,
            getitem_405,
            getitem_406,
            getitem_407,
            getitem_408,
            getitem_409,
            getitem_410,
            getitem_411,
            getitem_412,
            getitem_413,
            getitem_414,
            getitem_415,
            getitem_416,
            getitem_417,
            getitem_418,
            getitem_419,
            getitem_420,
            getitem_421,
            getitem_422,
            getitem_423,
            getitem_424,
            getitem_425,
            getitem_426,
            getitem_427,
            getitem_428,
            getitem_429,
            getitem_430,
            getitem_431,
            getitem_432,
            getitem_433,
            getitem_434,
            getitem_435,
            getitem_436,
            getitem_437,
            getitem_438,
            getitem_439,
            getitem_440,
            getitem_441,
            getitem_442,
            getitem_443,
            getitem_444,
            getitem_445,
            getitem_446,
            getitem_447,
            getitem_448,
            getitem_449,
            getitem_450,
            getitem_451,
            getitem_452,
            getitem_453,
            getitem_454,
            getitem_455,
            getitem_456,
            getitem_457,
            getitem_458,
            getitem_459,
            getitem_460,
            getitem_461,
            getitem_462,
            getitem_463,
            getitem_464,
            getitem_465,
            getitem_466,
            getitem_467,
            getitem_468,
            getitem_469,
            getitem_470,
            getitem_471,
            getitem_472,
            getitem_473,
        ],
        1.0,
    )
    getitem_395 = (
        getitem_396
    ) = (
        getitem_397
    ) = (
        getitem_398
    ) = (
        getitem_399
    ) = (
        getitem_400
    ) = (
        getitem_401
    ) = (
        getitem_402
    ) = (
        getitem_403
    ) = (
        getitem_404
    ) = (
        getitem_405
    ) = (
        getitem_406
    ) = (
        getitem_407
    ) = (
        getitem_408
    ) = (
        getitem_409
    ) = (
        getitem_410
    ) = (
        getitem_411
    ) = (
        getitem_412
    ) = (
        getitem_413
    ) = (
        getitem_414
    ) = (
        getitem_415
    ) = (
        getitem_416
    ) = (
        getitem_417
    ) = (
        getitem_418
    ) = (
        getitem_419
    ) = (
        getitem_420
    ) = (
        getitem_421
    ) = (
        getitem_422
    ) = (
        getitem_423
    ) = (
        getitem_424
    ) = (
        getitem_425
    ) = (
        getitem_426
    ) = (
        getitem_427
    ) = (
        getitem_428
    ) = (
        getitem_429
    ) = (
        getitem_430
    ) = (
        getitem_431
    ) = (
        getitem_432
    ) = (
        getitem_433
    ) = (
        getitem_434
    ) = (
        getitem_435
    ) = (
        getitem_436
    ) = (
        getitem_437
    ) = (
        getitem_438
    ) = (
        getitem_439
    ) = (
        getitem_440
    ) = (
        getitem_441
    ) = (
        getitem_442
    ) = (
        getitem_443
    ) = (
        getitem_444
    ) = (
        getitem_445
    ) = (
        getitem_446
    ) = (
        getitem_447
    ) = (
        getitem_448
    ) = (
        getitem_449
    ) = (
        getitem_450
    ) = (
        getitem_451
    ) = (
        getitem_452
    ) = (
        getitem_453
    ) = (
        getitem_454
    ) = (
        getitem_455
    ) = (
        getitem_456
    ) = (
        getitem_457
    ) = (
        getitem_458
    ) = (
        getitem_459
    ) = (
        getitem_460
    ) = (
        getitem_461
    ) = (
        getitem_462
    ) = (
        getitem_463
    ) = (
        getitem_464
    ) = (
        getitem_465
    ) = (
        getitem_466
    ) = (
        getitem_467
    ) = (
        getitem_468
    ) = getitem_469 = getitem_470 = getitem_471 = getitem_472 = getitem_473 = None
    getitem_474: "f32[][]cuda:0" = _foreach_maximum[0]
    getitem_475: "f32[][]cuda:0" = _foreach_maximum[1]
    getitem_476: "f32[][]cuda:0" = _foreach_maximum[2]
    getitem_477: "f32[][]cuda:0" = _foreach_maximum[3]
    getitem_478: "f32[][]cuda:0" = _foreach_maximum[4]
    getitem_479: "f32[][]cuda:0" = _foreach_maximum[5]
    getitem_480: "f32[][]cuda:0" = _foreach_maximum[6]
    getitem_481: "f32[][]cuda:0" = _foreach_maximum[7]
    getitem_482: "f32[][]cuda:0" = _foreach_maximum[8]
    getitem_483: "f32[][]cuda:0" = _foreach_maximum[9]
    getitem_484: "f32[][]cuda:0" = _foreach_maximum[10]
    getitem_485: "f32[][]cuda:0" = _foreach_maximum[11]
    getitem_486: "f32[][]cuda:0" = _foreach_maximum[12]
    getitem_487: "f32[][]cuda:0" = _foreach_maximum[13]
    getitem_488: "f32[][]cuda:0" = _foreach_maximum[14]
    getitem_489: "f32[][]cuda:0" = _foreach_maximum[15]
    getitem_490: "f32[][]cuda:0" = _foreach_maximum[16]
    getitem_491: "f32[][]cuda:0" = _foreach_maximum[17]
    getitem_492: "f32[][]cuda:0" = _foreach_maximum[18]
    getitem_493: "f32[][]cuda:0" = _foreach_maximum[19]
    getitem_494: "f32[][]cuda:0" = _foreach_maximum[20]
    getitem_495: "f32[][]cuda:0" = _foreach_maximum[21]
    getitem_496: "f32[][]cuda:0" = _foreach_maximum[22]
    getitem_497: "f32[][]cuda:0" = _foreach_maximum[23]
    getitem_498: "f32[][]cuda:0" = _foreach_maximum[24]
    getitem_499: "f32[][]cuda:0" = _foreach_maximum[25]
    getitem_500: "f32[][]cuda:0" = _foreach_maximum[26]
    getitem_501: "f32[][]cuda:0" = _foreach_maximum[27]
    getitem_502: "f32[][]cuda:0" = _foreach_maximum[28]
    getitem_503: "f32[][]cuda:0" = _foreach_maximum[29]
    getitem_504: "f32[][]cuda:0" = _foreach_maximum[30]
    getitem_505: "f32[][]cuda:0" = _foreach_maximum[31]
    getitem_506: "f32[][]cuda:0" = _foreach_maximum[32]
    getitem_507: "f32[][]cuda:0" = _foreach_maximum[33]
    getitem_508: "f32[][]cuda:0" = _foreach_maximum[34]
    getitem_509: "f32[][]cuda:0" = _foreach_maximum[35]
    getitem_510: "f32[][]cuda:0" = _foreach_maximum[36]
    getitem_511: "f32[][]cuda:0" = _foreach_maximum[37]
    getitem_512: "f32[][]cuda:0" = _foreach_maximum[38]
    getitem_513: "f32[][]cuda:0" = _foreach_maximum[39]
    getitem_514: "f32[][]cuda:0" = _foreach_maximum[40]
    getitem_515: "f32[][]cuda:0" = _foreach_maximum[41]
    getitem_516: "f32[][]cuda:0" = _foreach_maximum[42]
    getitem_517: "f32[][]cuda:0" = _foreach_maximum[43]
    getitem_518: "f32[][]cuda:0" = _foreach_maximum[44]
    getitem_519: "f32[][]cuda:0" = _foreach_maximum[45]
    getitem_520: "f32[][]cuda:0" = _foreach_maximum[46]
    getitem_521: "f32[][]cuda:0" = _foreach_maximum[47]
    getitem_522: "f32[][]cuda:0" = _foreach_maximum[48]
    getitem_523: "f32[][]cuda:0" = _foreach_maximum[49]
    getitem_524: "f32[][]cuda:0" = _foreach_maximum[50]
    getitem_525: "f32[][]cuda:0" = _foreach_maximum[51]
    getitem_526: "f32[][]cuda:0" = _foreach_maximum[52]
    getitem_527: "f32[][]cuda:0" = _foreach_maximum[53]
    getitem_528: "f32[][]cuda:0" = _foreach_maximum[54]
    getitem_529: "f32[][]cuda:0" = _foreach_maximum[55]
    getitem_530: "f32[][]cuda:0" = _foreach_maximum[56]
    getitem_531: "f32[][]cuda:0" = _foreach_maximum[57]
    getitem_532: "f32[][]cuda:0" = _foreach_maximum[58]
    getitem_533: "f32[][]cuda:0" = _foreach_maximum[59]
    getitem_534: "f32[][]cuda:0" = _foreach_maximum[60]
    getitem_535: "f32[][]cuda:0" = _foreach_maximum[61]
    getitem_536: "f32[][]cuda:0" = _foreach_maximum[62]
    getitem_537: "f32[][]cuda:0" = _foreach_maximum[63]
    getitem_538: "f32[][]cuda:0" = _foreach_maximum[64]
    getitem_539: "f32[][]cuda:0" = _foreach_maximum[65]
    getitem_540: "f32[][]cuda:0" = _foreach_maximum[66]
    getitem_541: "f32[][]cuda:0" = _foreach_maximum[67]
    getitem_542: "f32[][]cuda:0" = _foreach_maximum[68]
    getitem_543: "f32[][]cuda:0" = _foreach_maximum[69]
    getitem_544: "f32[][]cuda:0" = _foreach_maximum[70]
    getitem_545: "f32[][]cuda:0" = _foreach_maximum[71]
    getitem_546: "f32[][]cuda:0" = _foreach_maximum[72]
    getitem_547: "f32[][]cuda:0" = _foreach_maximum[73]
    getitem_548: "f32[][]cuda:0" = _foreach_maximum[74]
    getitem_549: "f32[][]cuda:0" = _foreach_maximum[75]
    getitem_550: "f32[][]cuda:0" = _foreach_maximum[76]
    getitem_551: "f32[][]cuda:0" = _foreach_maximum[77]
    getitem_552: "f32[][]cuda:0" = _foreach_maximum[78]
    _foreach_maximum = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:242 in _compute_clippy_shrinkage, code: (alphas).repeat(len(masked_blocked_params)),
    repeat: "f32[79][1]cuda:0" = torch.ops.aten.repeat.default(neg, [79])
    neg = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:241 in _compute_clippy_shrinkage, code: minus_lrs = torch.split(
    split = torch.ops.aten.split.Tensor(repeat, 1)
    getitem_553: "f32[1][1]cuda:0" = split[0]
    getitem_554: "f32[1][1]cuda:0" = split[1]
    getitem_555: "f32[1][1]cuda:0" = split[2]
    getitem_556: "f32[1][1]cuda:0" = split[3]
    getitem_557: "f32[1][1]cuda:0" = split[4]
    getitem_558: "f32[1][1]cuda:0" = split[5]
    getitem_559: "f32[1][1]cuda:0" = split[6]
    getitem_560: "f32[1][1]cuda:0" = split[7]
    getitem_561: "f32[1][1]cuda:0" = split[8]
    getitem_562: "f32[1][1]cuda:0" = split[9]
    getitem_563: "f32[1][1]cuda:0" = split[10]
    getitem_564: "f32[1][1]cuda:0" = split[11]
    getitem_565: "f32[1][1]cuda:0" = split[12]
    getitem_566: "f32[1][1]cuda:0" = split[13]
    getitem_567: "f32[1][1]cuda:0" = split[14]
    getitem_568: "f32[1][1]cuda:0" = split[15]
    getitem_569: "f32[1][1]cuda:0" = split[16]
    getitem_570: "f32[1][1]cuda:0" = split[17]
    getitem_571: "f32[1][1]cuda:0" = split[18]
    getitem_572: "f32[1][1]cuda:0" = split[19]
    getitem_573: "f32[1][1]cuda:0" = split[20]
    getitem_574: "f32[1][1]cuda:0" = split[21]
    getitem_575: "f32[1][1]cuda:0" = split[22]
    getitem_576: "f32[1][1]cuda:0" = split[23]
    getitem_577: "f32[1][1]cuda:0" = split[24]
    getitem_578: "f32[1][1]cuda:0" = split[25]
    getitem_579: "f32[1][1]cuda:0" = split[26]
    getitem_580: "f32[1][1]cuda:0" = split[27]
    getitem_581: "f32[1][1]cuda:0" = split[28]
    getitem_582: "f32[1][1]cuda:0" = split[29]
    getitem_583: "f32[1][1]cuda:0" = split[30]
    getitem_584: "f32[1][1]cuda:0" = split[31]
    getitem_585: "f32[1][1]cuda:0" = split[32]
    getitem_586: "f32[1][1]cuda:0" = split[33]
    getitem_587: "f32[1][1]cuda:0" = split[34]
    getitem_588: "f32[1][1]cuda:0" = split[35]
    getitem_589: "f32[1][1]cuda:0" = split[36]
    getitem_590: "f32[1][1]cuda:0" = split[37]
    getitem_591: "f32[1][1]cuda:0" = split[38]
    getitem_592: "f32[1][1]cuda:0" = split[39]
    getitem_593: "f32[1][1]cuda:0" = split[40]
    getitem_594: "f32[1][1]cuda:0" = split[41]
    getitem_595: "f32[1][1]cuda:0" = split[42]
    getitem_596: "f32[1][1]cuda:0" = split[43]
    getitem_597: "f32[1][1]cuda:0" = split[44]
    getitem_598: "f32[1][1]cuda:0" = split[45]
    getitem_599: "f32[1][1]cuda:0" = split[46]
    getitem_600: "f32[1][1]cuda:0" = split[47]
    getitem_601: "f32[1][1]cuda:0" = split[48]
    getitem_602: "f32[1][1]cuda:0" = split[49]
    getitem_603: "f32[1][1]cuda:0" = split[50]
    getitem_604: "f32[1][1]cuda:0" = split[51]
    getitem_605: "f32[1][1]cuda:0" = split[52]
    getitem_606: "f32[1][1]cuda:0" = split[53]
    getitem_607: "f32[1][1]cuda:0" = split[54]
    getitem_608: "f32[1][1]cuda:0" = split[55]
    getitem_609: "f32[1][1]cuda:0" = split[56]
    getitem_610: "f32[1][1]cuda:0" = split[57]
    getitem_611: "f32[1][1]cuda:0" = split[58]
    getitem_612: "f32[1][1]cuda:0" = split[59]
    getitem_613: "f32[1][1]cuda:0" = split[60]
    getitem_614: "f32[1][1]cuda:0" = split[61]
    getitem_615: "f32[1][1]cuda:0" = split[62]
    getitem_616: "f32[1][1]cuda:0" = split[63]
    getitem_617: "f32[1][1]cuda:0" = split[64]
    getitem_618: "f32[1][1]cuda:0" = split[65]
    getitem_619: "f32[1][1]cuda:0" = split[66]
    getitem_620: "f32[1][1]cuda:0" = split[67]
    getitem_621: "f32[1][1]cuda:0" = split[68]
    getitem_622: "f32[1][1]cuda:0" = split[69]
    getitem_623: "f32[1][1]cuda:0" = split[70]
    getitem_624: "f32[1][1]cuda:0" = split[71]
    getitem_625: "f32[1][1]cuda:0" = split[72]
    getitem_626: "f32[1][1]cuda:0" = split[73]
    getitem_627: "f32[1][1]cuda:0" = split[74]
    getitem_628: "f32[1][1]cuda:0" = split[75]
    getitem_629: "f32[1][1]cuda:0" = split[76]
    getitem_630: "f32[1][1]cuda:0" = split[77]
    getitem_631: "f32[1][1]cuda:0" = split[78]
    split = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:245 in _compute_clippy_shrinkage, code: torch._foreach_div_(minus_lrs, masked_blocked_shrinkage)
    _foreach_div_1 = torch.ops.aten._foreach_div.List(
        [
            getitem_553,
            getitem_554,
            getitem_555,
            getitem_556,
            getitem_557,
            getitem_558,
            getitem_559,
            getitem_560,
            getitem_561,
            getitem_562,
            getitem_563,
            getitem_564,
            getitem_565,
            getitem_566,
            getitem_567,
            getitem_568,
            getitem_569,
            getitem_570,
            getitem_571,
            getitem_572,
            getitem_573,
            getitem_574,
            getitem_575,
            getitem_576,
            getitem_577,
            getitem_578,
            getitem_579,
            getitem_580,
            getitem_581,
            getitem_582,
            getitem_583,
            getitem_584,
            getitem_585,
            getitem_586,
            getitem_587,
            getitem_588,
            getitem_589,
            getitem_590,
            getitem_591,
            getitem_592,
            getitem_593,
            getitem_594,
            getitem_595,
            getitem_596,
            getitem_597,
            getitem_598,
            getitem_599,
            getitem_600,
            getitem_601,
            getitem_602,
            getitem_603,
            getitem_604,
            getitem_605,
            getitem_606,
            getitem_607,
            getitem_608,
            getitem_609,
            getitem_610,
            getitem_611,
            getitem_612,
            getitem_613,
            getitem_614,
            getitem_615,
            getitem_616,
            getitem_617,
            getitem_618,
            getitem_619,
            getitem_620,
            getitem_621,
            getitem_622,
            getitem_623,
            getitem_624,
            getitem_625,
            getitem_626,
            getitem_627,
            getitem_628,
            getitem_629,
            getitem_630,
            getitem_631,
        ],
        [
            getitem_474,
            getitem_475,
            getitem_476,
            getitem_477,
            getitem_478,
            getitem_479,
            getitem_480,
            getitem_481,
            getitem_482,
            getitem_483,
            getitem_484,
            getitem_485,
            getitem_486,
            getitem_487,
            getitem_488,
            getitem_489,
            getitem_490,
            getitem_491,
            getitem_492,
            getitem_493,
            getitem_494,
            getitem_495,
            getitem_496,
            getitem_497,
            getitem_498,
            getitem_499,
            getitem_500,
            getitem_501,
            getitem_502,
            getitem_503,
            getitem_504,
            getitem_505,
            getitem_506,
            getitem_507,
            getitem_508,
            getitem_509,
            getitem_510,
            getitem_511,
            getitem_512,
            getitem_513,
            getitem_514,
            getitem_515,
            getitem_516,
            getitem_517,
            getitem_518,
            getitem_519,
            getitem_520,
            getitem_521,
            getitem_522,
            getitem_523,
            getitem_524,
            getitem_525,
            getitem_526,
            getitem_527,
            getitem_528,
            getitem_529,
            getitem_530,
            getitem_531,
            getitem_532,
            getitem_533,
            getitem_534,
            getitem_535,
            getitem_536,
            getitem_537,
            getitem_538,
            getitem_539,
            getitem_540,
            getitem_541,
            getitem_542,
            getitem_543,
            getitem_544,
            getitem_545,
            getitem_546,
            getitem_547,
            getitem_548,
            getitem_549,
            getitem_550,
            getitem_551,
            getitem_552,
        ],
    )
    getitem_553 = (
        getitem_554
    ) = (
        getitem_555
    ) = (
        getitem_556
    ) = (
        getitem_557
    ) = (
        getitem_558
    ) = (
        getitem_559
    ) = (
        getitem_560
    ) = (
        getitem_561
    ) = (
        getitem_562
    ) = (
        getitem_563
    ) = (
        getitem_564
    ) = (
        getitem_565
    ) = (
        getitem_566
    ) = (
        getitem_567
    ) = (
        getitem_568
    ) = (
        getitem_569
    ) = (
        getitem_570
    ) = (
        getitem_571
    ) = (
        getitem_572
    ) = (
        getitem_573
    ) = (
        getitem_574
    ) = (
        getitem_575
    ) = (
        getitem_576
    ) = (
        getitem_577
    ) = (
        getitem_578
    ) = (
        getitem_579
    ) = (
        getitem_580
    ) = (
        getitem_581
    ) = (
        getitem_582
    ) = (
        getitem_583
    ) = (
        getitem_584
    ) = (
        getitem_585
    ) = (
        getitem_586
    ) = (
        getitem_587
    ) = (
        getitem_588
    ) = (
        getitem_589
    ) = (
        getitem_590
    ) = (
        getitem_591
    ) = (
        getitem_592
    ) = (
        getitem_593
    ) = (
        getitem_594
    ) = (
        getitem_595
    ) = (
        getitem_596
    ) = (
        getitem_597
    ) = (
        getitem_598
    ) = (
        getitem_599
    ) = (
        getitem_600
    ) = (
        getitem_601
    ) = (
        getitem_602
    ) = (
        getitem_603
    ) = (
        getitem_604
    ) = (
        getitem_605
    ) = (
        getitem_606
    ) = (
        getitem_607
    ) = (
        getitem_608
    ) = (
        getitem_609
    ) = (
        getitem_610
    ) = (
        getitem_611
    ) = (
        getitem_612
    ) = (
        getitem_613
    ) = (
        getitem_614
    ) = (
        getitem_615
    ) = (
        getitem_616
    ) = (
        getitem_617
    ) = (
        getitem_618
    ) = (
        getitem_619
    ) = (
        getitem_620
    ) = (
        getitem_621
    ) = (
        getitem_622
    ) = (
        getitem_623
    ) = (
        getitem_624
    ) = (
        getitem_625
    ) = (
        getitem_626
    ) = (
        getitem_627
    ) = (
        getitem_628
    ) = (
        getitem_629
    ) = (
        getitem_630
    ) = (
        getitem_631
    ) = (
        getitem_474
    ) = (
        getitem_475
    ) = (
        getitem_476
    ) = (
        getitem_477
    ) = (
        getitem_478
    ) = (
        getitem_479
    ) = (
        getitem_480
    ) = (
        getitem_481
    ) = (
        getitem_482
    ) = (
        getitem_483
    ) = (
        getitem_484
    ) = (
        getitem_485
    ) = (
        getitem_486
    ) = (
        getitem_487
    ) = (
        getitem_488
    ) = (
        getitem_489
    ) = (
        getitem_490
    ) = (
        getitem_491
    ) = (
        getitem_492
    ) = (
        getitem_493
    ) = (
        getitem_494
    ) = (
        getitem_495
    ) = (
        getitem_496
    ) = (
        getitem_497
    ) = (
        getitem_498
    ) = (
        getitem_499
    ) = (
        getitem_500
    ) = (
        getitem_501
    ) = (
        getitem_502
    ) = (
        getitem_503
    ) = (
        getitem_504
    ) = (
        getitem_505
    ) = (
        getitem_506
    ) = (
        getitem_507
    ) = (
        getitem_508
    ) = (
        getitem_509
    ) = (
        getitem_510
    ) = (
        getitem_511
    ) = (
        getitem_512
    ) = (
        getitem_513
    ) = (
        getitem_514
    ) = (
        getitem_515
    ) = (
        getitem_516
    ) = (
        getitem_517
    ) = (
        getitem_518
    ) = (
        getitem_519
    ) = (
        getitem_520
    ) = (
        getitem_521
    ) = (
        getitem_522
    ) = (
        getitem_523
    ) = (
        getitem_524
    ) = (
        getitem_525
    ) = (
        getitem_526
    ) = (
        getitem_527
    ) = (
        getitem_528
    ) = (
        getitem_529
    ) = (
        getitem_530
    ) = (
        getitem_531
    ) = (
        getitem_532
    ) = (
        getitem_533
    ) = (
        getitem_534
    ) = (
        getitem_535
    ) = (
        getitem_536
    ) = (
        getitem_537
    ) = (
        getitem_538
    ) = (
        getitem_539
    ) = (
        getitem_540
    ) = (
        getitem_541
    ) = (
        getitem_542
    ) = (
        getitem_543
    ) = (
        getitem_544
    ) = (
        getitem_545
    ) = (
        getitem_546
    ) = (
        getitem_547
    ) = getitem_548 = getitem_549 = getitem_550 = getitem_551 = getitem_552 = None
    getitem_632: "f32[1][1]cuda:0" = _foreach_div_1[0]
    getitem_633: "f32[1][1]cuda:0" = _foreach_div_1[1]
    getitem_634: "f32[1][1]cuda:0" = _foreach_div_1[2]
    getitem_635: "f32[1][1]cuda:0" = _foreach_div_1[3]
    getitem_636: "f32[1][1]cuda:0" = _foreach_div_1[4]
    getitem_637: "f32[1][1]cuda:0" = _foreach_div_1[5]
    getitem_638: "f32[1][1]cuda:0" = _foreach_div_1[6]
    getitem_639: "f32[1][1]cuda:0" = _foreach_div_1[7]
    getitem_640: "f32[1][1]cuda:0" = _foreach_div_1[8]
    getitem_641: "f32[1][1]cuda:0" = _foreach_div_1[9]
    getitem_642: "f32[1][1]cuda:0" = _foreach_div_1[10]
    getitem_643: "f32[1][1]cuda:0" = _foreach_div_1[11]
    getitem_644: "f32[1][1]cuda:0" = _foreach_div_1[12]
    getitem_645: "f32[1][1]cuda:0" = _foreach_div_1[13]
    getitem_646: "f32[1][1]cuda:0" = _foreach_div_1[14]
    getitem_647: "f32[1][1]cuda:0" = _foreach_div_1[15]
    getitem_648: "f32[1][1]cuda:0" = _foreach_div_1[16]
    getitem_649: "f32[1][1]cuda:0" = _foreach_div_1[17]
    getitem_650: "f32[1][1]cuda:0" = _foreach_div_1[18]
    getitem_651: "f32[1][1]cuda:0" = _foreach_div_1[19]
    getitem_652: "f32[1][1]cuda:0" = _foreach_div_1[20]
    getitem_653: "f32[1][1]cuda:0" = _foreach_div_1[21]
    getitem_654: "f32[1][1]cuda:0" = _foreach_div_1[22]
    getitem_655: "f32[1][1]cuda:0" = _foreach_div_1[23]
    getitem_656: "f32[1][1]cuda:0" = _foreach_div_1[24]
    getitem_657: "f32[1][1]cuda:0" = _foreach_div_1[25]
    getitem_658: "f32[1][1]cuda:0" = _foreach_div_1[26]
    getitem_659: "f32[1][1]cuda:0" = _foreach_div_1[27]
    getitem_660: "f32[1][1]cuda:0" = _foreach_div_1[28]
    getitem_661: "f32[1][1]cuda:0" = _foreach_div_1[29]
    getitem_662: "f32[1][1]cuda:0" = _foreach_div_1[30]
    getitem_663: "f32[1][1]cuda:0" = _foreach_div_1[31]
    getitem_664: "f32[1][1]cuda:0" = _foreach_div_1[32]
    getitem_665: "f32[1][1]cuda:0" = _foreach_div_1[33]
    getitem_666: "f32[1][1]cuda:0" = _foreach_div_1[34]
    getitem_667: "f32[1][1]cuda:0" = _foreach_div_1[35]
    getitem_668: "f32[1][1]cuda:0" = _foreach_div_1[36]
    getitem_669: "f32[1][1]cuda:0" = _foreach_div_1[37]
    getitem_670: "f32[1][1]cuda:0" = _foreach_div_1[38]
    getitem_671: "f32[1][1]cuda:0" = _foreach_div_1[39]
    getitem_672: "f32[1][1]cuda:0" = _foreach_div_1[40]
    getitem_673: "f32[1][1]cuda:0" = _foreach_div_1[41]
    getitem_674: "f32[1][1]cuda:0" = _foreach_div_1[42]
    getitem_675: "f32[1][1]cuda:0" = _foreach_div_1[43]
    getitem_676: "f32[1][1]cuda:0" = _foreach_div_1[44]
    getitem_677: "f32[1][1]cuda:0" = _foreach_div_1[45]
    getitem_678: "f32[1][1]cuda:0" = _foreach_div_1[46]
    getitem_679: "f32[1][1]cuda:0" = _foreach_div_1[47]
    getitem_680: "f32[1][1]cuda:0" = _foreach_div_1[48]
    getitem_681: "f32[1][1]cuda:0" = _foreach_div_1[49]
    getitem_682: "f32[1][1]cuda:0" = _foreach_div_1[50]
    getitem_683: "f32[1][1]cuda:0" = _foreach_div_1[51]
    getitem_684: "f32[1][1]cuda:0" = _foreach_div_1[52]
    getitem_685: "f32[1][1]cuda:0" = _foreach_div_1[53]
    getitem_686: "f32[1][1]cuda:0" = _foreach_div_1[54]
    getitem_687: "f32[1][1]cuda:0" = _foreach_div_1[55]
    getitem_688: "f32[1][1]cuda:0" = _foreach_div_1[56]
    getitem_689: "f32[1][1]cuda:0" = _foreach_div_1[57]
    getitem_690: "f32[1][1]cuda:0" = _foreach_div_1[58]
    getitem_691: "f32[1][1]cuda:0" = _foreach_div_1[59]
    getitem_692: "f32[1][1]cuda:0" = _foreach_div_1[60]
    getitem_693: "f32[1][1]cuda:0" = _foreach_div_1[61]
    getitem_694: "f32[1][1]cuda:0" = _foreach_div_1[62]
    getitem_695: "f32[1][1]cuda:0" = _foreach_div_1[63]
    getitem_696: "f32[1][1]cuda:0" = _foreach_div_1[64]
    getitem_697: "f32[1][1]cuda:0" = _foreach_div_1[65]
    getitem_698: "f32[1][1]cuda:0" = _foreach_div_1[66]
    getitem_699: "f32[1][1]cuda:0" = _foreach_div_1[67]
    getitem_700: "f32[1][1]cuda:0" = _foreach_div_1[68]
    getitem_701: "f32[1][1]cuda:0" = _foreach_div_1[69]
    getitem_702: "f32[1][1]cuda:0" = _foreach_div_1[70]
    getitem_703: "f32[1][1]cuda:0" = _foreach_div_1[71]
    getitem_704: "f32[1][1]cuda:0" = _foreach_div_1[72]
    getitem_705: "f32[1][1]cuda:0" = _foreach_div_1[73]
    getitem_706: "f32[1][1]cuda:0" = _foreach_div_1[74]
    getitem_707: "f32[1][1]cuda:0" = _foreach_div_1[75]
    getitem_708: "f32[1][1]cuda:0" = _foreach_div_1[76]
    getitem_709: "f32[1][1]cuda:0" = _foreach_div_1[77]
    getitem_710: "f32[1][1]cuda:0" = _foreach_div_1[78]
    _foreach_div_1 = None
    slice_scatter: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        repeat, getitem_632, 0, 0, 1
    )
    repeat = getitem_632 = None
    slice_scatter_1: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter, getitem_633, 0, 1, 2
    )
    slice_scatter = getitem_633 = None
    slice_scatter_2: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_1, getitem_634, 0, 2, 3
    )
    slice_scatter_1 = getitem_634 = None
    slice_scatter_3: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_2, getitem_635, 0, 3, 4
    )
    slice_scatter_2 = getitem_635 = None
    slice_scatter_4: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_3, getitem_636, 0, 4, 5
    )
    slice_scatter_3 = getitem_636 = None
    slice_scatter_5: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_4, getitem_637, 0, 5, 6
    )
    slice_scatter_4 = getitem_637 = None
    slice_scatter_6: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_5, getitem_638, 0, 6, 7
    )
    slice_scatter_5 = getitem_638 = None
    slice_scatter_7: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_6, getitem_639, 0, 7, 8
    )
    slice_scatter_6 = getitem_639 = None
    slice_scatter_8: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_7, getitem_640, 0, 8, 9
    )
    slice_scatter_7 = getitem_640 = None
    slice_scatter_9: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_8, getitem_641, 0, 9, 10
    )
    slice_scatter_8 = getitem_641 = None
    slice_scatter_10: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_9, getitem_642, 0, 10, 11
    )
    slice_scatter_9 = getitem_642 = None
    slice_scatter_11: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_10, getitem_643, 0, 11, 12
    )
    slice_scatter_10 = getitem_643 = None
    slice_scatter_12: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_11, getitem_644, 0, 12, 13
    )
    slice_scatter_11 = getitem_644 = None
    slice_scatter_13: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_12, getitem_645, 0, 13, 14
    )
    slice_scatter_12 = getitem_645 = None
    slice_scatter_14: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_13, getitem_646, 0, 14, 15
    )
    slice_scatter_13 = getitem_646 = None
    slice_scatter_15: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_14, getitem_647, 0, 15, 16
    )
    slice_scatter_14 = getitem_647 = None
    slice_scatter_16: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_15, getitem_648, 0, 16, 17
    )
    slice_scatter_15 = getitem_648 = None
    slice_scatter_17: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_16, getitem_649, 0, 17, 18
    )
    slice_scatter_16 = getitem_649 = None
    slice_scatter_18: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_17, getitem_650, 0, 18, 19
    )
    slice_scatter_17 = getitem_650 = None
    slice_scatter_19: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_18, getitem_651, 0, 19, 20
    )
    slice_scatter_18 = getitem_651 = None
    slice_scatter_20: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_19, getitem_652, 0, 20, 21
    )
    slice_scatter_19 = getitem_652 = None
    slice_scatter_21: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_20, getitem_653, 0, 21, 22
    )
    slice_scatter_20 = getitem_653 = None
    slice_scatter_22: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_21, getitem_654, 0, 22, 23
    )
    slice_scatter_21 = getitem_654 = None
    slice_scatter_23: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_22, getitem_655, 0, 23, 24
    )
    slice_scatter_22 = getitem_655 = None
    slice_scatter_24: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_23, getitem_656, 0, 24, 25
    )
    slice_scatter_23 = getitem_656 = None
    slice_scatter_25: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_24, getitem_657, 0, 25, 26
    )
    slice_scatter_24 = getitem_657 = None
    slice_scatter_26: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_25, getitem_658, 0, 26, 27
    )
    slice_scatter_25 = getitem_658 = None
    slice_scatter_27: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_26, getitem_659, 0, 27, 28
    )
    slice_scatter_26 = getitem_659 = None
    slice_scatter_28: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_27, getitem_660, 0, 28, 29
    )
    slice_scatter_27 = getitem_660 = None
    slice_scatter_29: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_28, getitem_661, 0, 29, 30
    )
    slice_scatter_28 = getitem_661 = None
    slice_scatter_30: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_29, getitem_662, 0, 30, 31
    )
    slice_scatter_29 = getitem_662 = None
    slice_scatter_31: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_30, getitem_663, 0, 31, 32
    )
    slice_scatter_30 = getitem_663 = None
    slice_scatter_32: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_31, getitem_664, 0, 32, 33
    )
    slice_scatter_31 = getitem_664 = None
    slice_scatter_33: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_32, getitem_665, 0, 33, 34
    )
    slice_scatter_32 = getitem_665 = None
    slice_scatter_34: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_33, getitem_666, 0, 34, 35
    )
    slice_scatter_33 = getitem_666 = None
    slice_scatter_35: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_34, getitem_667, 0, 35, 36
    )
    slice_scatter_34 = getitem_667 = None
    slice_scatter_36: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_35, getitem_668, 0, 36, 37
    )
    slice_scatter_35 = getitem_668 = None
    slice_scatter_37: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_36, getitem_669, 0, 37, 38
    )
    slice_scatter_36 = getitem_669 = None
    slice_scatter_38: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_37, getitem_670, 0, 38, 39
    )
    slice_scatter_37 = getitem_670 = None
    slice_scatter_39: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_38, getitem_671, 0, 39, 40
    )
    slice_scatter_38 = getitem_671 = None
    slice_scatter_40: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_39, getitem_672, 0, 40, 41
    )
    slice_scatter_39 = getitem_672 = None
    slice_scatter_41: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_40, getitem_673, 0, 41, 42
    )
    slice_scatter_40 = getitem_673 = None
    slice_scatter_42: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_41, getitem_674, 0, 42, 43
    )
    slice_scatter_41 = getitem_674 = None
    slice_scatter_43: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_42, getitem_675, 0, 43, 44
    )
    slice_scatter_42 = getitem_675 = None
    slice_scatter_44: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_43, getitem_676, 0, 44, 45
    )
    slice_scatter_43 = getitem_676 = None
    slice_scatter_45: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_44, getitem_677, 0, 45, 46
    )
    slice_scatter_44 = getitem_677 = None
    slice_scatter_46: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_45, getitem_678, 0, 46, 47
    )
    slice_scatter_45 = getitem_678 = None
    slice_scatter_47: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_46, getitem_679, 0, 47, 48
    )
    slice_scatter_46 = getitem_679 = None
    slice_scatter_48: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_47, getitem_680, 0, 48, 49
    )
    slice_scatter_47 = getitem_680 = None
    slice_scatter_49: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_48, getitem_681, 0, 49, 50
    )
    slice_scatter_48 = getitem_681 = None
    slice_scatter_50: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_49, getitem_682, 0, 50, 51
    )
    slice_scatter_49 = getitem_682 = None
    slice_scatter_51: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_50, getitem_683, 0, 51, 52
    )
    slice_scatter_50 = getitem_683 = None
    slice_scatter_52: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_51, getitem_684, 0, 52, 53
    )
    slice_scatter_51 = getitem_684 = None
    slice_scatter_53: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_52, getitem_685, 0, 53, 54
    )
    slice_scatter_52 = getitem_685 = None
    slice_scatter_54: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_53, getitem_686, 0, 54, 55
    )
    slice_scatter_53 = getitem_686 = None
    slice_scatter_55: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_54, getitem_687, 0, 55, 56
    )
    slice_scatter_54 = getitem_687 = None
    slice_scatter_56: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_55, getitem_688, 0, 56, 57
    )
    slice_scatter_55 = getitem_688 = None
    slice_scatter_57: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_56, getitem_689, 0, 57, 58
    )
    slice_scatter_56 = getitem_689 = None
    slice_scatter_58: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_57, getitem_690, 0, 58, 59
    )
    slice_scatter_57 = getitem_690 = None
    slice_scatter_59: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_58, getitem_691, 0, 59, 60
    )
    slice_scatter_58 = getitem_691 = None
    slice_scatter_60: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_59, getitem_692, 0, 60, 61
    )
    slice_scatter_59 = getitem_692 = None
    slice_scatter_61: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_60, getitem_693, 0, 61, 62
    )
    slice_scatter_60 = getitem_693 = None
    slice_scatter_62: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_61, getitem_694, 0, 62, 63
    )
    slice_scatter_61 = getitem_694 = None
    slice_scatter_63: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_62, getitem_695, 0, 63, 64
    )
    slice_scatter_62 = getitem_695 = None
    slice_scatter_64: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_63, getitem_696, 0, 64, 65
    )
    slice_scatter_63 = getitem_696 = None
    slice_scatter_65: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_64, getitem_697, 0, 65, 66
    )
    slice_scatter_64 = getitem_697 = None
    slice_scatter_66: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_65, getitem_698, 0, 66, 67
    )
    slice_scatter_65 = getitem_698 = None
    slice_scatter_67: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_66, getitem_699, 0, 67, 68
    )
    slice_scatter_66 = getitem_699 = None
    slice_scatter_68: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_67, getitem_700, 0, 68, 69
    )
    slice_scatter_67 = getitem_700 = None
    slice_scatter_69: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_68, getitem_701, 0, 69, 70
    )
    slice_scatter_68 = getitem_701 = None
    slice_scatter_70: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_69, getitem_702, 0, 70, 71
    )
    slice_scatter_69 = getitem_702 = None
    slice_scatter_71: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_70, getitem_703, 0, 71, 72
    )
    slice_scatter_70 = getitem_703 = None
    slice_scatter_72: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_71, getitem_704, 0, 72, 73
    )
    slice_scatter_71 = getitem_704 = None
    slice_scatter_73: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_72, getitem_705, 0, 73, 74
    )
    slice_scatter_72 = getitem_705 = None
    slice_scatter_74: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_73, getitem_706, 0, 74, 75
    )
    slice_scatter_73 = getitem_706 = None
    slice_scatter_75: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_74, getitem_707, 0, 75, 76
    )
    slice_scatter_74 = getitem_707 = None
    slice_scatter_76: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_75, getitem_708, 0, 76, 77
    )
    slice_scatter_75 = getitem_708 = None
    slice_scatter_77: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_76, getitem_709, 0, 77, 78
    )
    slice_scatter_76 = getitem_709 = None
    slice_scatter_78: "f32[79][1]cuda:0" = torch.ops.aten.slice_scatter.default(
        slice_scatter_77, getitem_710, 0, 78, 79
    )
    slice_scatter_77 = getitem_710 = None
    split_1 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_711: "f32[1][1]cuda:0" = split_1[0]
    split_1 = None
    split_2 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_791: "f32[1][1]cuda:0" = split_2[1]
    split_2 = None
    split_3 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_871: "f32[1][1]cuda:0" = split_3[2]
    split_3 = None
    split_4 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_951: "f32[1][1]cuda:0" = split_4[3]
    split_4 = None
    split_5 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1031: "f32[1][1]cuda:0" = split_5[4]
    split_5 = None
    split_6 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1111: "f32[1][1]cuda:0" = split_6[5]
    split_6 = None
    split_7 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1191: "f32[1][1]cuda:0" = split_7[6]
    split_7 = None
    split_8 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1271: "f32[1][1]cuda:0" = split_8[7]
    split_8 = None
    split_9 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1351: "f32[1][1]cuda:0" = split_9[8]
    split_9 = None
    split_10 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1431: "f32[1][1]cuda:0" = split_10[9]
    split_10 = None
    split_11 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1511: "f32[1][1]cuda:0" = split_11[10]
    split_11 = None
    split_12 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1591: "f32[1][1]cuda:0" = split_12[11]
    split_12 = None
    split_13 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1671: "f32[1][1]cuda:0" = split_13[12]
    split_13 = None
    split_14 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1751: "f32[1][1]cuda:0" = split_14[13]
    split_14 = None
    split_15 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1831: "f32[1][1]cuda:0" = split_15[14]
    split_15 = None
    split_16 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1911: "f32[1][1]cuda:0" = split_16[15]
    split_16 = None
    split_17 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_1991: "f32[1][1]cuda:0" = split_17[16]
    split_17 = None
    split_18 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2071: "f32[1][1]cuda:0" = split_18[17]
    split_18 = None
    split_19 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2151: "f32[1][1]cuda:0" = split_19[18]
    split_19 = None
    split_20 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2231: "f32[1][1]cuda:0" = split_20[19]
    split_20 = None
    split_21 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2311: "f32[1][1]cuda:0" = split_21[20]
    split_21 = None
    split_22 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2391: "f32[1][1]cuda:0" = split_22[21]
    split_22 = None
    split_23 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2471: "f32[1][1]cuda:0" = split_23[22]
    split_23 = None
    split_24 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2551: "f32[1][1]cuda:0" = split_24[23]
    split_24 = None
    split_25 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2631: "f32[1][1]cuda:0" = split_25[24]
    split_25 = None
    split_26 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2711: "f32[1][1]cuda:0" = split_26[25]
    split_26 = None
    split_27 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2791: "f32[1][1]cuda:0" = split_27[26]
    split_27 = None
    split_28 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2871: "f32[1][1]cuda:0" = split_28[27]
    split_28 = None
    split_29 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_2951: "f32[1][1]cuda:0" = split_29[28]
    split_29 = None
    split_30 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3031: "f32[1][1]cuda:0" = split_30[29]
    split_30 = None
    split_31 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3111: "f32[1][1]cuda:0" = split_31[30]
    split_31 = None
    split_32 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3191: "f32[1][1]cuda:0" = split_32[31]
    split_32 = None
    split_33 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3271: "f32[1][1]cuda:0" = split_33[32]
    split_33 = None
    split_34 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3351: "f32[1][1]cuda:0" = split_34[33]
    split_34 = None
    split_35 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3431: "f32[1][1]cuda:0" = split_35[34]
    split_35 = None
    split_36 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3511: "f32[1][1]cuda:0" = split_36[35]
    split_36 = None
    split_37 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3591: "f32[1][1]cuda:0" = split_37[36]
    split_37 = None
    split_38 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3671: "f32[1][1]cuda:0" = split_38[37]
    split_38 = None
    split_39 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3751: "f32[1][1]cuda:0" = split_39[38]
    split_39 = None
    split_40 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3831: "f32[1][1]cuda:0" = split_40[39]
    split_40 = None
    split_41 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3911: "f32[1][1]cuda:0" = split_41[40]
    split_41 = None
    split_42 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_3991: "f32[1][1]cuda:0" = split_42[41]
    split_42 = None
    split_43 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4071: "f32[1][1]cuda:0" = split_43[42]
    split_43 = None
    split_44 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4151: "f32[1][1]cuda:0" = split_44[43]
    split_44 = None
    split_45 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4231: "f32[1][1]cuda:0" = split_45[44]
    split_45 = None
    split_46 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4311: "f32[1][1]cuda:0" = split_46[45]
    split_46 = None
    split_47 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4391: "f32[1][1]cuda:0" = split_47[46]
    split_47 = None
    split_48 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4471: "f32[1][1]cuda:0" = split_48[47]
    split_48 = None
    split_49 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4551: "f32[1][1]cuda:0" = split_49[48]
    split_49 = None
    split_50 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4631: "f32[1][1]cuda:0" = split_50[49]
    split_50 = None
    split_51 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4711: "f32[1][1]cuda:0" = split_51[50]
    split_51 = None
    split_52 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4791: "f32[1][1]cuda:0" = split_52[51]
    split_52 = None
    split_53 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4871: "f32[1][1]cuda:0" = split_53[52]
    split_53 = None
    split_54 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_4951: "f32[1][1]cuda:0" = split_54[53]
    split_54 = None
    split_55 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5031: "f32[1][1]cuda:0" = split_55[54]
    split_55 = None
    split_56 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5111: "f32[1][1]cuda:0" = split_56[55]
    split_56 = None
    split_57 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5191: "f32[1][1]cuda:0" = split_57[56]
    split_57 = None
    split_58 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5271: "f32[1][1]cuda:0" = split_58[57]
    split_58 = None
    split_59 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5351: "f32[1][1]cuda:0" = split_59[58]
    split_59 = None
    split_60 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5431: "f32[1][1]cuda:0" = split_60[59]
    split_60 = None
    split_61 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5511: "f32[1][1]cuda:0" = split_61[60]
    split_61 = None
    split_62 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5591: "f32[1][1]cuda:0" = split_62[61]
    split_62 = None
    split_63 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5671: "f32[1][1]cuda:0" = split_63[62]
    split_63 = None
    split_64 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5751: "f32[1][1]cuda:0" = split_64[63]
    split_64 = None
    split_65 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5831: "f32[1][1]cuda:0" = split_65[64]
    split_65 = None
    split_66 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5911: "f32[1][1]cuda:0" = split_66[65]
    split_66 = None
    split_67 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_5991: "f32[1][1]cuda:0" = split_67[66]
    split_67 = None
    split_68 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6071: "f32[1][1]cuda:0" = split_68[67]
    split_68 = None
    split_69 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6151: "f32[1][1]cuda:0" = split_69[68]
    split_69 = None
    split_70 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6231: "f32[1][1]cuda:0" = split_70[69]
    split_70 = None
    split_71 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6311: "f32[1][1]cuda:0" = split_71[70]
    split_71 = None
    split_72 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6391: "f32[1][1]cuda:0" = split_72[71]
    split_72 = None
    split_73 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6471: "f32[1][1]cuda:0" = split_73[72]
    split_73 = None
    split_74 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6551: "f32[1][1]cuda:0" = split_74[73]
    split_74 = None
    split_75 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6631: "f32[1][1]cuda:0" = split_75[74]
    split_75 = None
    split_76 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6711: "f32[1][1]cuda:0" = split_76[75]
    split_76 = None
    split_77 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6791: "f32[1][1]cuda:0" = split_77[76]
    split_77 = None
    split_78 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    getitem_6871: "f32[1][1]cuda:0" = split_78[77]
    split_78 = None
    split_79 = torch.ops.aten.split.Tensor(slice_scatter_78, 1)
    slice_scatter_78 = None
    getitem_6951: "f32[1][1]cuda:0" = split_79[78]
    split_79 = None

    # File: <torch_package_0>.hpc/optimizers/distributed_shampoo/prod/distributed_shampoo.py:824 in _apply_decoupled_weight_decay, code: torch._foreach_add_(
    _foreach_add_1 = torch.ops.aten._foreach_add.List(
        [
            arg1_1,
            arg2_1,
            arg3_1,
            arg4_1,
            arg5_1,
            arg6_1,
            arg7_1,
            arg8_1,
            arg9_1,
            arg10_1,
            arg11_1,
            arg12_1,
            arg13_1,
            arg14_1,
            arg15_1,
            arg16_1,
            arg17_1,
            arg18_1,
            arg19_1,
            arg20_1,
            arg21_1,
            arg22_1,
            arg23_1,
            arg24_1,
            arg25_1,
            arg26_1,
            arg27_1,
            arg28_1,
            arg29_1,
            arg30_1,
            arg31_1,
            arg32_1,
            arg33_1,
            arg34_1,
            arg35_1,
            arg36_1,
            arg37_1,
            arg38_1,
            arg39_1,
            arg40_1,
            arg41_1,
            arg42_1,
            arg43_1,
            arg44_1,
            arg45_1,
            arg46_1,
            arg47_1,
            arg48_1,
            arg49_1,
            arg50_1,
            arg51_1,
            arg52_1,
            arg53_1,
            arg54_1,
            arg55_1,
            arg56_1,
            arg57_1,
            arg58_1,
            arg59_1,
            arg60_1,
            arg61_1,
            arg62_1,
            arg63_1,
            arg64_1,
            arg65_1,
            arg66_1,
            arg67_1,
            arg68_1,
            arg69_1,
            arg70_1,
            arg71_1,
            arg72_1,
            arg73_1,
            arg74_1,
            arg75_1,
            arg76_1,
            arg77_1,
            arg78_1,
            arg79_1,
        ],
        [
            arg80_1,
            arg81_1,
            arg82_1,
            arg83_1,
            arg84_1,
            arg85_1,
            arg86_1,
            arg87_1,
            arg88_1,
            arg89_1,
            arg90_1,
            arg91_1,
            arg92_1,
            arg93_1,
            arg94_1,
            arg95_1,
            arg96_1,
            arg97_1,
            arg98_1,
            arg99_1,
            arg100_1,
            arg101_1,
            arg102_1,
            arg103_1,
            arg104_1,
            arg105_1,
            arg106_1,
            arg107_1,
            arg108_1,
            arg109_1,
            arg110_1,
            arg111_1,
            arg112_1,
            arg113_1,
            arg114_1,
            arg115_1,
            arg116_1,
            arg117_1,
            arg118_1,
            arg119_1,
            arg120_1,
            arg121_1,
            arg122_1,
            arg123_1,
            arg124_1,
            arg125_1,
            arg126_1,
            arg127_1,
            arg128_1,
            arg129_1,
            arg130_1,
            arg131_1,
            arg132_1,
            arg133_1,
            arg134_1,
            arg135_1,
            arg136_1,
            arg137_1,
            arg138_1,
            arg139_1,
            arg140_1,
            arg141_1,
            arg142_1,
            arg143_1,
            arg144_1,
            arg145_1,
            arg146_1,
            arg147_1,
            arg148_1,
            arg149_1,
            arg150_1,
            arg151_1,
            arg152_1,
            arg153_1,
            arg154_1,
            arg155_1,
            arg156_1,
            arg157_1,
            arg158_1,
        ],
        alpha=1e-05,
    )
    arg80_1 = (
        arg81_1
    ) = (
        arg82_1
    ) = (
        arg83_1
    ) = (
        arg84_1
    ) = (
        arg85_1
    ) = (
        arg86_1
    ) = (
        arg87_1
    ) = (
        arg88_1
    ) = (
        arg89_1
    ) = (
        arg90_1
    ) = (
        arg91_1
    ) = (
        arg92_1
    ) = (
        arg93_1
    ) = (
        arg94_1
    ) = (
        arg95_1
    ) = (
        arg96_1
    ) = (
        arg97_1
    ) = (
        arg98_1
    ) = (
        arg99_1
    ) = (
        arg100_1
    ) = (
        arg101_1
    ) = (
        arg102_1
    ) = (
        arg103_1
    ) = (
        arg104_1
    ) = (
        arg105_1
    ) = (
        arg106_1
    ) = (
        arg107_1
    ) = (
        arg108_1
    ) = (
        arg109_1
    ) = (
        arg110_1
    ) = (
        arg111_1
    ) = (
        arg112_1
    ) = (
        arg113_1
    ) = (
        arg114_1
    ) = (
        arg115_1
    ) = (
        arg116_1
    ) = (
        arg117_1
    ) = (
        arg118_1
    ) = (
        arg119_1
    ) = (
        arg120_1
    ) = (
        arg121_1
    ) = (
        arg122_1
    ) = (
        arg123_1
    ) = (
        arg124_1
    ) = (
        arg125_1
    ) = (
        arg126_1
    ) = (
        arg127_1
    ) = (
        arg128_1
    ) = (
        arg129_1
    ) = (
        arg130_1
    ) = (
        arg131_1
    ) = (
        arg132_1
    ) = (
        arg133_1
    ) = (
        arg134_1
    ) = (
        arg135_1
    ) = (
        arg136_1
    ) = (
        arg137_1
    ) = (
        arg138_1
    ) = (
        arg139_1
    ) = (
        arg140_1
    ) = (
        arg141_1
    ) = (
        arg142_1
    ) = (
        arg143_1
    ) = (
        arg144_1
    ) = (
        arg145_1
    ) = (
        arg146_1
    ) = (
        arg147_1
    ) = (
        arg148_1
    ) = (
        arg149_1
    ) = (
        arg150_1
    ) = (
        arg151_1
    ) = (
        arg152_1
    ) = arg153_1 = arg154_1 = arg155_1 = arg156_1 = arg157_1 = arg158_1 = None
    getitem_6952: "f32[50][1]cuda:0" = _foreach_add_1[0]
    getitem_6953: "f32[23][1]cuda:0" = _foreach_add_1[1]
    getitem_6954: "f32[38][1]cuda:0" = _foreach_add_1[2]
    getitem_6955: "f32[5][1]cuda:0" = _foreach_add_1[3]
    getitem_6956: "f32[100][1]cuda:0" = _foreach_add_1[4]
    getitem_6957: "f32[50][1]cuda:0" = _foreach_add_1[5]
    getitem_6958: "f32[77][1]cuda:0" = _foreach_add_1[6]
    getitem_6959: "f32[100][1]cuda:0" = _foreach_add_1[7]
    getitem_6960: "f32[100][1]cuda:0" = _foreach_add_1[8]
    getitem_6961: "f32[96][1]cuda:0" = _foreach_add_1[9]
    getitem_6962: "f32[78][1]cuda:0" = _foreach_add_1[10]
    getitem_6963: "f32[100][1]cuda:0" = _foreach_add_1[11]
    getitem_6964: "f32[100][1]cuda:0" = _foreach_add_1[12]
    getitem_6965: "f32[97][1]cuda:0" = _foreach_add_1[13]
    getitem_6966: "f32[819, 732][732, 1]cuda:0" = _foreach_add_1[14]
    getitem_6967: "f32[204][1]cuda:0" = _foreach_add_1[15]
    getitem_6968: "f32[64][1]cuda:0" = _foreach_add_1[16]
    getitem_6969: "f32[204][1]cuda:0" = _foreach_add_1[17]
    getitem_6970: "f32[64, 204][204, 1]cuda:0" = _foreach_add_1[18]
    getitem_6971: "f32[204][1]cuda:0" = _foreach_add_1[19]
    getitem_6972: "f32[204, 160][160, 1]cuda:0" = _foreach_add_1[20]
    getitem_6973: "f32[204][1]cuda:0" = _foreach_add_1[21]
    getitem_6974: "f32[64][1]cuda:0" = _foreach_add_1[22]
    getitem_6975: "f32[204][1]cuda:0" = _foreach_add_1[23]
    getitem_6976: "f32[64, 204][204, 1]cuda:0" = _foreach_add_1[24]
    getitem_6977: "f32[204][1]cuda:0" = _foreach_add_1[25]
    getitem_6978: "f32[204][1]cuda:0" = _foreach_add_1[26]
    getitem_6979: "f32[64][1]cuda:0" = _foreach_add_1[27]
    getitem_6980: "f32[204][1]cuda:0" = _foreach_add_1[28]
    getitem_6981: "f32[64, 204][204, 1]cuda:0" = _foreach_add_1[29]
    getitem_6982: "f32[204][1]cuda:0" = _foreach_add_1[30]
    getitem_6983: "f32[204, 72][72, 1]cuda:0" = _foreach_add_1[31]
    getitem_6984: "f32[204][1]cuda:0" = _foreach_add_1[32]
    getitem_6985: "f32[64][1]cuda:0" = _foreach_add_1[33]
    getitem_6986: "f32[64, 204][204, 1]cuda:0" = _foreach_add_1[34]
    getitem_6987: "f32[768, 2675][2675, 1]cuda:0" = _foreach_add_1[35]
    getitem_6988: "f32[768, 2048][2048, 1]cuda:0" = _foreach_add_1[36]
    getitem_6989: "f32[768][1]cuda:0" = _foreach_add_1[37]
    getitem_6990: "f32[4096][1]cuda:0" = _foreach_add_1[38]
    getitem_6991: "f32[4096, 256][256, 1]cuda:0" = _foreach_add_1[39]
    getitem_6992: "f32[64][1]cuda:0" = _foreach_add_1[40]
    getitem_6993: "f32[2675][1]cuda:0" = _foreach_add_1[41]
    getitem_6994: "f32[1536, 4096][4096, 1]cuda:0" = _foreach_add_1[42]
    getitem_6995: "f32[4096][1]cuda:0" = _foreach_add_1[43]
    getitem_6996: "f32[1840][1]cuda:0" = _foreach_add_1[44]
    getitem_6997: "f32[2048, 2675][2675, 1]cuda:0" = _foreach_add_1[45]
    getitem_6998: "f32[2048][1]cuda:0" = _foreach_add_1[46]
    getitem_6999: "f32[2048][1]cuda:0" = _foreach_add_1[47]
    getitem_7000: "f32[768][1]cuda:0" = _foreach_add_1[48]
    getitem_7001: "f32[256][1]cuda:0" = _foreach_add_1[49]
    getitem_7002: "f32[768, 2048][2048, 1]cuda:0" = _foreach_add_1[50]
    getitem_7003: "f32[4096][1]cuda:0" = _foreach_add_1[51]
    getitem_7004: "f32[104][1]cuda:0" = _foreach_add_1[52]
    getitem_7005: "f32[768][1]cuda:0" = _foreach_add_1[53]
    getitem_7006: "f32[1024][1]cuda:0" = _foreach_add_1[54]
    getitem_7007: "f32[2048][1]cuda:0" = _foreach_add_1[55]
    getitem_7008: "f32[768, 2675][2675, 1]cuda:0" = _foreach_add_1[56]
    getitem_7009: "f32[2675][1]cuda:0" = _foreach_add_1[57]
    getitem_7010: "f32[256][1]cuda:0" = _foreach_add_1[58]
    getitem_7011: "f32[768][1]cuda:0" = _foreach_add_1[59]
    getitem_7012: "f32[256, 768][768, 1]cuda:0" = _foreach_add_1[60]
    getitem_7013: "f32[64][1]cuda:0" = _foreach_add_1[61]
    getitem_7014: "f32[1536][1]cuda:0" = _foreach_add_1[62]
    getitem_7015: "f32[2048][1]cuda:0" = _foreach_add_1[63]
    getitem_7016: "f32[3360][1]cuda:0" = _foreach_add_1[64]
    getitem_7017: "f32[768][1]cuda:0" = _foreach_add_1[65]
    getitem_7018: "f32[768, 2048][2048, 1]cuda:0" = _foreach_add_1[66]
    getitem_7019: "f32[256][1]cuda:0" = _foreach_add_1[67]
    getitem_7020: "f32[104, 256][256, 1]cuda:0" = _foreach_add_1[68]
    getitem_7021: "f32[2675][1]cuda:0" = _foreach_add_1[69]
    getitem_7022: "f32[768][1]cuda:0" = _foreach_add_1[70]
    getitem_7023: "f32[2048][1]cuda:0" = _foreach_add_1[71]
    getitem_7024: "f32[1024][1]cuda:0" = _foreach_add_1[72]
    getitem_7025: "f32[64, 612][612, 1]cuda:0" = _foreach_add_1[73]
    getitem_7026: "f32[128][1]cuda:0" = _foreach_add_1[74]
    getitem_7027: "f32[308, 256][256, 1]cuda:0" = _foreach_add_1[75]
    getitem_7028: "f32[1][1]cuda:0" = _foreach_add_1[76]
    getitem_7029: "f32[512][1]cuda:0" = _foreach_add_1[77]
    getitem_7030: "f32[512][1]cuda:0" = _foreach_add_1[78]
    _foreach_add_1 = None

    # File: <torch_package_0>.caffe2/torch/fb/optim/shampoo_wrapper.py:356 in torch_dynamo_resume_in__per_group_step_impl_at_316, code: torch._foreach_mul_(masked_blocked_search_directions, adjusted_lr)  # pyre-ignore [6]
    _foreach_mul_2 = torch.ops.aten._foreach_mul.List(
        [
            getitem_6952,
            getitem_6953,
            getitem_6954,
            getitem_6955,
            getitem_6956,
            getitem_6957,
            getitem_6958,
            getitem_6959,
            getitem_6960,
            getitem_6961,
            getitem_6962,
            getitem_6963,
            getitem_6964,
            getitem_6965,
            getitem_6966,
            getitem_6967,
            getitem_6968,
            getitem_6969,
            getitem_6970,
            getitem_6971,
            getitem_6972,
            getitem_6973,
            getitem_6974,
            getitem_6975,
            getitem_6976,
            getitem_6977,
            getitem_6978,
            getitem_6979,
            getitem_6980,
            getitem_6981,
            getitem_6982,
            getitem_6983,
            getitem_6984,
            getitem_6985,
            getitem_6986,
            getitem_6987,
            getitem_6988,
            getitem_6989,
            getitem_6990,
            getitem_6991,
            getitem_6992,
            getitem_6993,
            getitem_6994,
            getitem_6995,
            getitem_6996,
            getitem_6997,
            getitem_6998,
            getitem_6999,
            getitem_7000,
            getitem_7001,
            getitem_7002,
            getitem_7003,
            getitem_7004,
            getitem_7005,
            getitem_7006,
            getitem_7007,
            getitem_7008,
            getitem_7009,
            getitem_7010,
            getitem_7011,
            getitem_7012,
            getitem_7013,
            getitem_7014,
            getitem_7015,
            getitem_7016,
            getitem_7017,
            getitem_7018,
            getitem_7019,
            getitem_7020,
            getitem_7021,
            getitem_7022,
            getitem_7023,
            getitem_7024,
            getitem_7025,
            getitem_7026,
            getitem_7027,
            getitem_7028,
            getitem_7029,
            getitem_7030,
        ],
        [
            getitem_711,
            getitem_791,
            getitem_871,
            getitem_951,
            getitem_1031,
            getitem_1111,
            getitem_1191,
            getitem_1271,
            getitem_1351,
            getitem_1431,
            getitem_1511,
            getitem_1591,
            getitem_1671,
            getitem_1751,
            getitem_1831,
            getitem_1911,
            getitem_1991,
            getitem_2071,
            getitem_2151,
            getitem_2231,
            getitem_2311,
            getitem_2391,
            getitem_2471,
            getitem_2551,
            getitem_2631,
            getitem_2711,
            getitem_2791,
            getitem_2871,
            getitem_2951,
            getitem_3031,
            getitem_3111,
            getitem_3191,
            getitem_3271,
            getitem_3351,
            getitem_3431,
            getitem_3511,
            getitem_3591,
            getitem_3671,
            getitem_3751,
            getitem_3831,
            getitem_3911,
            getitem_3991,
            getitem_4071,
            getitem_4151,
            getitem_4231,
            getitem_4311,
            getitem_4391,
            getitem_4471,
            getitem_4551,
            getitem_4631,
            getitem_4711,
            getitem_4791,
            getitem_4871,
            getitem_4951,
            getitem_5031,
            getitem_5111,
            getitem_5191,
            getitem_5271,
            getitem_5351,
            getitem_5431,
            getitem_5511,
            getitem_5591,
            getitem_5671,
            getitem_5751,
            getitem_5831,
            getitem_5911,
            getitem_5991,
            getitem_6071,
            getitem_6151,
            getitem_6231,
            getitem_6311,
            getitem_6391,
            getitem_6471,
            getitem_6551,
            getitem_6631,
            getitem_6711,
            getitem_6791,
            getitem_6871,
            getitem_6951,
        ],
    )
    getitem_6952 = (
        getitem_6953
    ) = (
        getitem_6954
    ) = (
        getitem_6955
    ) = (
        getitem_6956
    ) = (
        getitem_6957
    ) = (
        getitem_6958
    ) = (
        getitem_6959
    ) = (
        getitem_6960
    ) = (
        getitem_6961
    ) = (
        getitem_6962
    ) = (
        getitem_6963
    ) = (
        getitem_6964
    ) = (
        getitem_6965
    ) = (
        getitem_6966
    ) = (
        getitem_6967
    ) = (
        getitem_6968
    ) = (
        getitem_6969
    ) = (
        getitem_6970
    ) = (
        getitem_6971
    ) = (
        getitem_6972
    ) = (
        getitem_6973
    ) = (
        getitem_6974
    ) = (
        getitem_6975
    ) = (
        getitem_6976
    ) = (
        getitem_6977
    ) = (
        getitem_6978
    ) = (
        getitem_6979
    ) = (
        getitem_6980
    ) = (
        getitem_6981
    ) = (
        getitem_6982
    ) = (
        getitem_6983
    ) = (
        getitem_6984
    ) = (
        getitem_6985
    ) = (
        getitem_6986
    ) = (
        getitem_6987
    ) = (
        getitem_6988
    ) = (
        getitem_6989
    ) = (
        getitem_6990
    ) = (
        getitem_6991
    ) = (
        getitem_6992
    ) = (
        getitem_6993
    ) = (
        getitem_6994
    ) = (
        getitem_6995
    ) = (
        getitem_6996
    ) = (
        getitem_6997
    ) = (
        getitem_6998
    ) = (
        getitem_6999
    ) = (
        getitem_7000
    ) = (
        getitem_7001
    ) = (
        getitem_7002
    ) = (
        getitem_7003
    ) = (
        getitem_7004
    ) = (
        getitem_7005
    ) = (
        getitem_7006
    ) = (
        getitem_7007
    ) = (
        getitem_7008
    ) = (
        getitem_7009
    ) = (
        getitem_7010
    ) = (
        getitem_7011
    ) = (
        getitem_7012
    ) = (
        getitem_7013
    ) = (
        getitem_7014
    ) = (
        getitem_7015
    ) = (
        getitem_7016
    ) = (
        getitem_7017
    ) = (
        getitem_7018
    ) = (
        getitem_7019
    ) = (
        getitem_7020
    ) = (
        getitem_7021
    ) = (
        getitem_7022
    ) = (
        getitem_7023
    ) = (
        getitem_7024
    ) = (
        getitem_7025
    ) = (
        getitem_7026
    ) = (
        getitem_7027
    ) = (
        getitem_7028
    ) = (
        getitem_7029
    ) = (
        getitem_7030
    ) = (
        getitem_711
    ) = (
        getitem_791
    ) = (
        getitem_871
    ) = (
        getitem_951
    ) = (
        getitem_1031
    ) = (
        getitem_1111
    ) = (
        getitem_1191
    ) = (
        getitem_1271
    ) = (
        getitem_1351
    ) = (
        getitem_1431
    ) = (
        getitem_1511
    ) = (
        getitem_1591
    ) = (
        getitem_1671
    ) = (
        getitem_1751
    ) = (
        getitem_1831
    ) = (
        getitem_1911
    ) = (
        getitem_1991
    ) = (
        getitem_2071
    ) = (
        getitem_2151
    ) = (
        getitem_2231
    ) = (
        getitem_2311
    ) = (
        getitem_2391
    ) = (
        getitem_2471
    ) = (
        getitem_2551
    ) = (
        getitem_2631
    ) = (
        getitem_2711
    ) = (
        getitem_2791
    ) = (
        getitem_2871
    ) = (
        getitem_2951
    ) = (
        getitem_3031
    ) = (
        getitem_3111
    ) = (
        getitem_3191
    ) = (
        getitem_3271
    ) = (
        getitem_3351
    ) = (
        getitem_3431
    ) = (
        getitem_3511
    ) = (
        getitem_3591
    ) = (
        getitem_3671
    ) = (
        getitem_3751
    ) = (
        getitem_3831
    ) = (
        getitem_3911
    ) = (
        getitem_3991
    ) = (
        getitem_4071
    ) = (
        getitem_4151
    ) = (
        getitem_4231
    ) = (
        getitem_4311
    ) = (
        getitem_4391
    ) = (
        getitem_4471
    ) = (
        getitem_4551
    ) = (
        getitem_4631
    ) = (
        getitem_4711
    ) = (
        getitem_4791
    ) = (
        getitem_4871
    ) = (
        getitem_4951
    ) = (
        getitem_5031
    ) = (
        getitem_5111
    ) = (
        getitem_5191
    ) = (
        getitem_5271
    ) = (
        getitem_5351
    ) = (
        getitem_5431
    ) = (
        getitem_5511
    ) = (
        getitem_5591
    ) = (
        getitem_5671
    ) = (
        getitem_5751
    ) = (
        getitem_5831
    ) = (
        getitem_5911
    ) = (
        getitem_5991
    ) = (
        getitem_6071
    ) = (
        getitem_6151
    ) = (
        getitem_6231
    ) = (
        getitem_6311
    ) = (
        getitem_6391
    ) = (
        getitem_6471
    ) = (
        getitem_6551
    ) = getitem_6631 = getitem_6711 = getitem_6791 = getitem_6871 = getitem_6951 = None
    getitem_7031: "f32[50][1]cuda:0" = _foreach_mul_2[0]
    getitem_7032: "f32[23][1]cuda:0" = _foreach_mul_2[1]
    getitem_7033: "f32[38][1]cuda:0" = _foreach_mul_2[2]
    getitem_7034: "f32[5][1]cuda:0" = _foreach_mul_2[3]
    getitem_7035: "f32[100][1]cuda:0" = _foreach_mul_2[4]
    getitem_7036: "f32[50][1]cuda:0" = _foreach_mul_2[5]
    getitem_7037: "f32[77][1]cuda:0" = _foreach_mul_2[6]
    getitem_7038: "f32[100][1]cuda:0" = _foreach_mul_2[7]
    getitem_7039: "f32[100][1]cuda:0" = _foreach_mul_2[8]
    getitem_7040: "f32[96][1]cuda:0" = _foreach_mul_2[9]
    getitem_7041: "f32[78][1]cuda:0" = _foreach_mul_2[10]
    getitem_7042: "f32[100][1]cuda:0" = _foreach_mul_2[11]
    getitem_7043: "f32[100][1]cuda:0" = _foreach_mul_2[12]
    getitem_7044: "f32[97][1]cuda:0" = _foreach_mul_2[13]
    getitem_7045: "f32[819, 732][732, 1]cuda:0" = _foreach_mul_2[14]
    getitem_7046: "f32[204][1]cuda:0" = _foreach_mul_2[15]
    getitem_7047: "f32[64][1]cuda:0" = _foreach_mul_2[16]
    getitem_7048: "f32[204][1]cuda:0" = _foreach_mul_2[17]
    getitem_7049: "f32[64, 204][204, 1]cuda:0" = _foreach_mul_2[18]
    getitem_7050: "f32[204][1]cuda:0" = _foreach_mul_2[19]
    getitem_7051: "f32[204, 160][160, 1]cuda:0" = _foreach_mul_2[20]
    getitem_7052: "f32[204][1]cuda:0" = _foreach_mul_2[21]
    getitem_7053: "f32[64][1]cuda:0" = _foreach_mul_2[22]
    getitem_7054: "f32[204][1]cuda:0" = _foreach_mul_2[23]
    getitem_7055: "f32[64, 204][204, 1]cuda:0" = _foreach_mul_2[24]
    getitem_7056: "f32[204][1]cuda:0" = _foreach_mul_2[25]
    getitem_7057: "f32[204][1]cuda:0" = _foreach_mul_2[26]
    getitem_7058: "f32[64][1]cuda:0" = _foreach_mul_2[27]
    getitem_7059: "f32[204][1]cuda:0" = _foreach_mul_2[28]
    getitem_7060: "f32[64, 204][204, 1]cuda:0" = _foreach_mul_2[29]
    getitem_7061: "f32[204][1]cuda:0" = _foreach_mul_2[30]
    getitem_7062: "f32[204, 72][72, 1]cuda:0" = _foreach_mul_2[31]
    getitem_7063: "f32[204][1]cuda:0" = _foreach_mul_2[32]
    getitem_7064: "f32[64][1]cuda:0" = _foreach_mul_2[33]
    getitem_7065: "f32[64, 204][204, 1]cuda:0" = _foreach_mul_2[34]
    getitem_7066: "f32[768, 2675][2675, 1]cuda:0" = _foreach_mul_2[35]
    getitem_7067: "f32[768, 2048][2048, 1]cuda:0" = _foreach_mul_2[36]
    getitem_7068: "f32[768][1]cuda:0" = _foreach_mul_2[37]
    getitem_7069: "f32[4096][1]cuda:0" = _foreach_mul_2[38]
    getitem_7070: "f32[4096, 256][256, 1]cuda:0" = _foreach_mul_2[39]
    getitem_7071: "f32[64][1]cuda:0" = _foreach_mul_2[40]
    getitem_7072: "f32[2675][1]cuda:0" = _foreach_mul_2[41]
    getitem_7073: "f32[1536, 4096][4096, 1]cuda:0" = _foreach_mul_2[42]
    getitem_7074: "f32[4096][1]cuda:0" = _foreach_mul_2[43]
    getitem_7075: "f32[1840][1]cuda:0" = _foreach_mul_2[44]
    getitem_7076: "f32[2048, 2675][2675, 1]cuda:0" = _foreach_mul_2[45]
    getitem_7077: "f32[2048][1]cuda:0" = _foreach_mul_2[46]
    getitem_7078: "f32[2048][1]cuda:0" = _foreach_mul_2[47]
    getitem_7079: "f32[768][1]cuda:0" = _foreach_mul_2[48]
    getitem_7080: "f32[256][1]cuda:0" = _foreach_mul_2[49]
    getitem_7081: "f32[768, 2048][2048, 1]cuda:0" = _foreach_mul_2[50]
    getitem_7082: "f32[4096][1]cuda:0" = _foreach_mul_2[51]
    getitem_7083: "f32[104][1]cuda:0" = _foreach_mul_2[52]
    getitem_7084: "f32[768][1]cuda:0" = _foreach_mul_2[53]
    getitem_7085: "f32[1024][1]cuda:0" = _foreach_mul_2[54]
    getitem_7086: "f32[2048][1]cuda:0" = _foreach_mul_2[55]
    getitem_7087: "f32[768, 2675][2675, 1]cuda:0" = _foreach_mul_2[56]
    getitem_7088: "f32[2675][1]cuda:0" = _foreach_mul_2[57]
    getitem_7089: "f32[256][1]cuda:0" = _foreach_mul_2[58]
    getitem_7090: "f32[768][1]cuda:0" = _foreach_mul_2[59]
    getitem_7091: "f32[256, 768][768, 1]cuda:0" = _foreach_mul_2[60]
    getitem_7092: "f32[64][1]cuda:0" = _foreach_mul_2[61]
    getitem_7093: "f32[1536][1]cuda:0" = _foreach_mul_2[62]
    getitem_7094: "f32[2048][1]cuda:0" = _foreach_mul_2[63]
    getitem_7095: "f32[3360][1]cuda:0" = _foreach_mul_2[64]
    getitem_7096: "f32[768][1]cuda:0" = _foreach_mul_2[65]
    getitem_7097: "f32[768, 2048][2048, 1]cuda:0" = _foreach_mul_2[66]
    getitem_7098: "f32[256][1]cuda:0" = _foreach_mul_2[67]
    getitem_7099: "f32[104, 256][256, 1]cuda:0" = _foreach_mul_2[68]
    getitem_7100: "f32[2675][1]cuda:0" = _foreach_mul_2[69]
    getitem_7101: "f32[768][1]cuda:0" = _foreach_mul_2[70]
    getitem_7102: "f32[2048][1]cuda:0" = _foreach_mul_2[71]
    getitem_7103: "f32[1024][1]cuda:0" = _foreach_mul_2[72]
    getitem_7104: "f32[64, 612][612, 1]cuda:0" = _foreach_mul_2[73]
    getitem_7105: "f32[128][1]cuda:0" = _foreach_mul_2[74]
    getitem_7106: "f32[308, 256][256, 1]cuda:0" = _foreach_mul_2[75]
    getitem_7107: "f32[1][1]cuda:0" = _foreach_mul_2[76]
    getitem_7108: "f32[512][1]cuda:0" = _foreach_mul_2[77]
    getitem_7109: "f32[512][1]cuda:0" = _foreach_mul_2[78]
    _foreach_mul_2 = None
    copy_: "f32[50][1]cuda:0" = torch.ops.aten.copy_.default(arg1_1, getitem_7031)
    arg1_1 = getitem_7031 = None  #
    copy__1: "f32[23][1]cuda:0" = torch.ops.aten.copy_.default(arg2_1, getitem_7032)
    arg2_1 = getitem_7032 = None  #
    copy__2: "f32[38][1]cuda:0" = torch.ops.aten.copy_.default(arg3_1, getitem_7033)
    arg3_1 = getitem_7033 = None  #
    copy__3: "f32[5][1]cuda:0" = torch.ops.aten.copy_.default(arg4_1, getitem_7034)
    arg4_1 = getitem_7034 = None  #
    copy__4: "f32[100][1]cuda:0" = torch.ops.aten.copy_.default(arg5_1, getitem_7035)
    arg5_1 = getitem_7035 = None  #
    copy__5: "f32[50][1]cuda:0" = torch.ops.aten.copy_.default(arg6_1, getitem_7036)
    arg6_1 = getitem_7036 = None  #
    copy__6: "f32[77][1]cuda:0" = torch.ops.aten.copy_.default(arg7_1, getitem_7037)
    arg7_1 = getitem_7037 = None  #
    copy__7: "f32[100][1]cuda:0" = torch.ops.aten.copy_.default(arg8_1, getitem_7038)
    arg8_1 = getitem_7038 = None  #
    copy__8: "f32[100][1]cuda:0" = torch.ops.aten.copy_.default(arg9_1, getitem_7039)
    arg9_1 = getitem_7039 = None  #
    copy__9: "f32[96][1]cuda:0" = torch.ops.aten.copy_.default(arg10_1, getitem_7040)
    arg10_1 = getitem_7040 = None  #
    copy__10: "f32[78][1]cuda:0" = torch.ops.aten.copy_.default(arg11_1, getitem_7041)
    arg11_1 = getitem_7041 = None
    copy__11: "f32[100][1]cuda:0" = torch.ops.aten.copy_.default(arg12_1, getitem_7042)
    arg12_1 = getitem_7042 = None
    copy__12: "f32[100][1]cuda:0" = torch.ops.aten.copy_.default(arg13_1, getitem_7043)
    arg13_1 = getitem_7043 = None
    copy__13: "f32[97][1]cuda:0" = torch.ops.aten.copy_.default(arg14_1, getitem_7044)
    arg14_1 = getitem_7044 = None
    copy__14: "f32[819, 732][732, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg15_1, getitem_7045
    )
    arg15_1 = getitem_7045 = None
    copy__15: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg16_1, getitem_7046)
    arg16_1 = getitem_7046 = None
    copy__16: "f32[64][1]cuda:0" = torch.ops.aten.copy_.default(arg17_1, getitem_7047)
    arg17_1 = getitem_7047 = None
    copy__17: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg18_1, getitem_7048)
    arg18_1 = getitem_7048 = None
    copy__18: "f32[64, 204][204, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg19_1, getitem_7049
    )
    arg19_1 = getitem_7049 = None
    copy__19: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg20_1, getitem_7050)
    arg20_1 = getitem_7050 = None
    copy__20: "f32[204, 160][160, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg21_1, getitem_7051
    )
    arg21_1 = getitem_7051 = None
    copy__21: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg22_1, getitem_7052)
    arg22_1 = getitem_7052 = None
    copy__23: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg24_1, getitem_7054)
    arg24_1 = getitem_7054 = None
    copy__24: "f32[64, 204][204, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg25_1, getitem_7055
    )
    arg25_1 = getitem_7055 = None
    copy__25: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg26_1, getitem_7056)
    arg26_1 = getitem_7056 = None
    copy__26: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg27_1, getitem_7057)
    arg27_1 = getitem_7057 = None
    copy__27: "f32[64][1]cuda:0" = torch.ops.aten.copy_.default(arg28_1, getitem_7058)
    arg28_1 = getitem_7058 = None
    copy__28: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg29_1, getitem_7059)
    arg29_1 = getitem_7059 = None
    copy__29: "f32[64, 204][204, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg30_1, getitem_7060
    )
    arg30_1 = getitem_7060 = None
    copy__30: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg31_1, getitem_7061)
    arg31_1 = getitem_7061 = None
    copy__31: "f32[204, 72][72, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg32_1, getitem_7062
    )
    arg32_1 = getitem_7062 = None
    copy__32: "f32[204][1]cuda:0" = torch.ops.aten.copy_.default(arg33_1, getitem_7063)
    arg33_1 = getitem_7063 = None
    copy__33: "f32[64][1]cuda:0" = torch.ops.aten.copy_.default(arg34_1, getitem_7064)
    arg34_1 = getitem_7064 = None
    copy__34: "f32[64, 204][204, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg35_1, getitem_7065
    )
    arg35_1 = getitem_7065 = None
    copy__35: "f32[768, 2675][2675, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg36_1, getitem_7066
    )
    arg36_1 = getitem_7066 = None
    copy__36: "f32[768, 2048][2048, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg37_1, getitem_7067
    )
    arg37_1 = getitem_7067 = None
    copy__37: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg38_1, getitem_7068)
    arg38_1 = getitem_7068 = None
    copy__38: "f32[4096][1]cuda:0" = torch.ops.aten.copy_.default(arg39_1, getitem_7069)
    arg39_1 = getitem_7069 = None
    copy__39: "f32[4096, 256][256, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg40_1, getitem_7070
    )
    arg40_1 = getitem_7070 = None
    copy__40: "f32[64][1]cuda:0" = torch.ops.aten.copy_.default(arg41_1, getitem_7071)
    arg41_1 = getitem_7071 = None
    copy__41: "f32[2675][1]cuda:0" = torch.ops.aten.copy_.default(arg42_1, getitem_7072)
    arg42_1 = getitem_7072 = None
    copy__42: "f32[1536, 4096][4096, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg43_1, getitem_7073
    )
    arg43_1 = getitem_7073 = None
    copy__43: "f32[4096][1]cuda:0" = torch.ops.aten.copy_.default(arg44_1, getitem_7074)
    arg44_1 = getitem_7074 = None
    copy__44: "f32[1840][1]cuda:0" = torch.ops.aten.copy_.default(arg45_1, getitem_7075)
    arg45_1 = getitem_7075 = None
    copy__45: "f32[2048, 2675][2675, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg46_1, getitem_7076
    )
    arg46_1 = getitem_7076 = None
    copy__46: "f32[2048][1]cuda:0" = torch.ops.aten.copy_.default(arg47_1, getitem_7077)
    arg47_1 = getitem_7077 = None
    copy__47: "f32[2048][1]cuda:0" = torch.ops.aten.copy_.default(arg48_1, getitem_7078)
    arg48_1 = getitem_7078 = None
    copy__48: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg49_1, getitem_7079)
    arg49_1 = getitem_7079 = None
    copy__50: "f32[768, 2048][2048, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg51_1, getitem_7081
    )
    arg51_1 = getitem_7081 = None
    copy__51: "f32[4096][1]cuda:0" = torch.ops.aten.copy_.default(arg52_1, getitem_7082)
    arg52_1 = getitem_7082 = None
    copy__52: "f32[104][1]cuda:0" = torch.ops.aten.copy_.default(arg53_1, getitem_7083)
    arg53_1 = getitem_7083 = None
    copy__53: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg54_1, getitem_7084)
    arg54_1 = getitem_7084 = None
    copy__54: "f32[1024][1]cuda:0" = torch.ops.aten.copy_.default(arg55_1, getitem_7085)
    arg55_1 = getitem_7085 = None
    copy__55: "f32[2048][1]cuda:0" = torch.ops.aten.copy_.default(arg56_1, getitem_7086)
    arg56_1 = getitem_7086 = None
    copy__56: "f32[768, 2675][2675, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg57_1, getitem_7087
    )
    arg57_1 = getitem_7087 = None
    copy__57: "f32[2675][1]cuda:0" = torch.ops.aten.copy_.default(arg58_1, getitem_7088)
    arg58_1 = getitem_7088 = None
    copy__58: "f32[256][1]cuda:0" = torch.ops.aten.copy_.default(arg59_1, getitem_7089)
    arg59_1 = getitem_7089 = None
    copy__59: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg60_1, getitem_7090)
    arg60_1 = getitem_7090 = None
    copy__60: "f32[256, 768][768, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg61_1, getitem_7091
    )
    arg61_1 = getitem_7091 = None
    copy__61: "f32[64][1]cuda:0" = torch.ops.aten.copy_.default(arg62_1, getitem_7092)
    arg62_1 = getitem_7092 = None
    copy__62: "f32[1536][1]cuda:0" = torch.ops.aten.copy_.default(arg63_1, getitem_7093)
    arg63_1 = getitem_7093 = None
    copy__63: "f32[2048][1]cuda:0" = torch.ops.aten.copy_.default(arg64_1, getitem_7094)
    arg64_1 = getitem_7094 = None
    copy__64: "f32[3360][1]cuda:0" = torch.ops.aten.copy_.default(arg65_1, getitem_7095)
    arg65_1 = getitem_7095 = None
    copy__65: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg66_1, getitem_7096)
    arg66_1 = getitem_7096 = None
    copy__66: "f32[768, 2048][2048, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg67_1, getitem_7097
    )
    arg67_1 = getitem_7097 = None
    copy__67: "f32[256][1]cuda:0" = torch.ops.aten.copy_.default(arg68_1, getitem_7098)
    arg68_1 = getitem_7098 = None
    copy__68: "f32[104, 256][256, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg69_1, getitem_7099
    )
    arg69_1 = getitem_7099 = None
    copy__69: "f32[2675][1]cuda:0" = torch.ops.aten.copy_.default(arg70_1, getitem_7100)
    arg70_1 = getitem_7100 = None
    copy__70: "f32[768][1]cuda:0" = torch.ops.aten.copy_.default(arg71_1, getitem_7101)
    arg71_1 = getitem_7101 = None
    copy__71: "f32[2048][1]cuda:0" = torch.ops.aten.copy_.default(arg72_1, getitem_7102)
    arg72_1 = getitem_7102 = None
    copy__72: "f32[1024][1]cuda:0" = torch.ops.aten.copy_.default(arg73_1, getitem_7103)
    arg73_1 = getitem_7103 = None
    copy__73: "f32[64, 612][612, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg74_1, getitem_7104
    )
    arg74_1 = getitem_7104 = None
    copy__74: "f32[128][1]cuda:0" = torch.ops.aten.copy_.default(arg75_1, getitem_7105)
    arg75_1 = getitem_7105 = None
    copy__75: "f32[308, 256][256, 1]cuda:0" = torch.ops.aten.copy_.default(
        arg76_1, getitem_7106
    )
    arg76_1 = getitem_7106 = None
    copy__76: "f32[1][1]cuda:0" = torch.ops.aten.copy_.default(arg77_1, getitem_7107)
    arg77_1 = getitem_7107 = None
    copy__77: "f32[512][1]cuda:0" = torch.ops.aten.copy_.default(arg78_1, getitem_7108)
    arg78_1 = getitem_7108 = None
    copy__78: "f32[512][1]cuda:0" = torch.ops.aten.copy_.default(arg79_1, getitem_7109)
    arg79_1 = getitem_7109 = None
    return ()
