wifi-densepose/benchmarks/wiflow-std/results/edge_optimization.json

{
  "torch": {
    "env": {
      "torch": "2.12.0+cpu",
      "platform": "Windows-11-10.0.26200-SP0",
      "processor": "Intel64 Family 6 Model 197 Stepping 2, GenuineIntel",
      "num_threads": 16,
      "checkpoint": "results\\retrained_best_pose_model.pth",
      "params": 2225042
    },
    "variants": {
      "fp32": {
        "file": "retrained_fp32_resaved.pth",
        "size_bytes": 9068948,
        "size_mb": 9.068948,
        "latency_batch1": {
          "batch_size": 1,
          "runs": 100,
          "median_ms_per_batch": 24.903650000851485,
          "median_ms_per_window": 24.903650000851485,
          "windows_per_second": 40.15475642991324
        },
        "latency_batch64": {
          "batch_size": 64,
          "runs": 30,
          "median_ms_per_batch": 184.02919999789447,
          "median_ms_per_window": 2.875456249967101,
          "windows_per_second": 347.77089723115813
        },
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9668200004577636,
          "pck@50": 0.9915333324432373,
          "mpjpe": 0.00936222033649683,
          "wall_seconds": 37.85407733917236
        }
      },
      "fp16": {
        "file": "retrained_fp16.pth",
        "size_bytes": 4580332,
        "size_mb": 4.580332,
        "latency_batch1": {
          "batch_size": 1,
          "runs": 100,
          "median_ms_per_batch": 23.936699999467237,
          "median_ms_per_window": 23.936699999467237,
          "windows_per_second": 41.776853117691964
        },
        "latency_batch64": {
          "batch_size": 64,
          "runs": 30,
          "median_ms_per_batch": 102.32584999903338,
          "median_ms_per_window": 1.5988414062348966,
          "windows_per_second": 625.4529036465817
        },
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.966773332977295,
          "pck@50": 0.9915066654205322,
          "mpjpe": 0.009460017587244511,
          "wall_seconds": 21.632277250289917
        }
      },
      "int8_dynamic": {
        "file": "retrained_int8_dynamic.pth",
        "size_bytes": 9068948,
        "size_mb": 9.068948,
        "latency_batch1": {
          "batch_size": 1,
          "runs": 100,
          "median_ms_per_batch": 18.105350000041653,
          "median_ms_per_window": 18.105350000041653,
          "windows_per_second": 55.23229321707117
        },
        "latency_batch64": {
          "batch_size": 64,
          "runs": 30,
          "median_ms_per_batch": 168.77549999844632,
          "median_ms_per_window": 2.6371171874757238,
          "windows_per_second": 379.20195763359703
        },
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9668200004577636,
          "pck@50": 0.9915333324432373,
          "mpjpe": 0.00936222033649683,
          "wall_seconds": 45.35376596450806
        }
      }
    },
    "int8_dynamic_quant_report": {
      "eligible_module_counts": {
        "nn.Linear": 0,
        "nn.Conv1d": 21,
        "nn.Conv2d": 22
      },
      "modules_actually_quantized": [],
      "n_modules_quantized": 0,
      "params_total": 2225042,
      "params_quantized": 0,
      "params_quantized_fraction": 0.0
    },
    "accuracy_subset": {
      "description": "seed-42 file-level 70/15/15 test split, corrupted windows (files 487-499) excluded, seed-42 random subset",
      "subset_size": 10000,
      "clean_test_total": 10000
    }
  },
  "onnx": {
    "env": {
      "torch": "2.12.0+cpu",
      "onnxruntime": "1.26.0",
      "platform": "Windows-11-10.0.26200-SP0"
    },
    "export": {
      "mode": "dynamic-batch",
      "exporter": "torchscript",
      "file": "retrained_fp32_dynamic.onnx",
      "size_mb": 8.971781
    },
    "parity": {
      "fixture": "results/parity_fixture.npz (batch 2, seed 42)",
      "max_abs_diff_vs_stored_fixture": 2.384185791015625e-07,
      "max_abs_diff_vs_torch_now": 2.384185791015625e-07,
      "pass_lt_1e-4": true
    },
    "latency": {
      "batch1": {
        "batch_size": 1,
        "runs": 100,
        "median_ms_per_batch": 2.5410999987798277,
        "median_ms_per_window": 2.5410999987798277,
        "windows_per_second": 393.5303610563043
      },
      "batch64": {
        "batch_size": 64,
        "runs": 30,
        "median_ms_per_batch": 181.95204999938142,
        "median_ms_per_window": 2.8430007812403346,
        "windows_per_second": 351.7410218803118
      }
    },
    "ort_int8_dynamic_supplementary": {
      "file": "retrained_int8_ort_dynamic.onnx",
      "size_mb": 2.438794,
      "runs": true,
      "max_abs_diff_vs_fp32_fixture": 0.00827130675315857
    }
  },
  "onnx_accuracy": {
    "onnx_fp32": {
      "samples": 10000,
      "pck@20": 0.9668200004577636,
      "pck@50": 0.9915333324432373,
      "mpjpe": 0.00936222568154335,
      "wall_seconds": 22.34790802001953
    },
    "onnx_int8_ort_dynamic": {
      "samples": 10000,
      "pck@20": 0.965240001964569,
      "pck@50": 0.9915466655731201,
      "mpjpe": 0.01108054072111845,
      "wall_seconds": 55.742953062057495
    }
  },
  "latency_controlled_rerun": {
    "note": "3 interleaved repetitions per variant, median ms/window; quiet box",
    "fp32": {
      "batch1_ms_per_window_median": 10.969150001983508,
      "batch1_reps": [
        10.969150001983508,
        12.646450000829645,
        10.49820000116597
      ],
      "batch64_ms_per_window_median": 2.2734187500077496,
      "batch64_reps": [
        2.377234374989712,
        2.124126562478068,
        2.2734187500077496
      ]
    },
    "fp16": {
      "batch1_ms_per_window_median": 24.313550000442774,
      "batch1_reps": [
        25.1078499986761,
        21.856999999727122,
        24.313550000442774
      ],
      "batch64_ms_per_window_median": 2.414695312495496,
      "batch64_reps": [
        2.5705156249955508,
        1.7137437499741281,
        2.414695312495496
      ]
    },
    "int8_dynamic": {
      "batch1_ms_per_window_median": 15.627150000000256,
      "batch1_reps": [
        17.67525000104797,
        14.627999998992891,
        15.627150000000256
      ],
      "batch64_ms_per_window_median": 2.0546906250160646,
      "batch64_reps": [
        2.0546906250160646,
        2.03407343752815,
        2.9325796875241394
      ]
    },
    "onnx_fp32": {
      "batch1_ms_per_window_median": 3.186650001225644,
      "batch1_reps": [
        2.7332500012562377,
        3.1995500012271805,
        3.186650001225644
      ],
      "batch64_ms_per_window_median": 1.9893374999924163,
      "batch64_reps": [
        1.5590843750032946,
        1.9893374999924163,
        2.2144343749914697
      ]
    },
    "onnx_int8_ort_dynamic": {
      "batch1_ms_per_window_median": 6.50984999811044,
      "batch1_reps": [
        6.50984999811044,
        6.455249998907675,
        6.789299999581999
      ],
      "batch64_ms_per_window_median": 5.770093750015803,
      "batch64_reps": [
        5.770093750015803,
        3.912374999970325,
        7.8067296875019565
      ]
    }
  },
  "onnx_static_ptq": {
    "env": {
      "onnxruntime": "1.26.0",
      "torch": "2.12.0+cpu",
      "platform": "Windows-11-10.0.26200-SP0",
      "source_model": "retrained_fp32_dynamic.onnx",
      "preprocessed_model": {
        "file": "retrained_fp32_preproc.onnx",
        "size_mb": 8.981529
      }
    },
    "variants": {
      "minmax_all": {
        "file": "retrained_int8_static_minmax_all.onnx",
        "size_bytes": 2604286,
        "size_mb": 2.604286,
        "calibration": {
          "method": "minmax",
          "windows": 1000,
          "percentile": null,
          "seconds": 5.052440166473389
        },
        "scope": "all",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 283,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 181,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.015945255756378174,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9545266661643982,
          "pck@50": 0.9913666645050049,
          "mpjpe": 0.014860070134699345,
          "wall_seconds": 43.455235958099365
        }
      },
      "minmax_conv": {
        "file": "retrained_int8_static_minmax_conv.onnx",
        "size_bytes": 2527421,
        "size_mb": 2.527421,
        "calibration": {
          "method": "minmax",
          "windows": 1000,
          "percentile": null,
          "seconds": 4.380746126174927
        },
        "scope": "conv",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 156,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 78,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.010693132877349854,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9663399996757507,
          "pck@50": 0.9918666641235352,
          "mpjpe": 0.01084446222037077,
          "wall_seconds": 35.937947034835815
        }
      },
      "entropy_all": {
        "file": "retrained_int8_static_entropy_all.onnx",
        "size_bytes": 2604268,
        "size_mb": 2.604268,
        "calibration": {
          "method": "entropy",
          "windows": 512,
          "percentile": null,
          "seconds": 23.835066318511963
        },
        "scope": "all",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 283,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 181,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.015280365943908691,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9530466662406921,
          "pck@50": 0.9912600006103516,
          "mpjpe": 0.015098519864678382,
          "wall_seconds": 51.514281034469604
        }
      },
      "entropy_conv": {
        "file": "retrained_int8_static_entropy_conv.onnx",
        "size_bytes": 2527403,
        "size_mb": 2.527403,
        "calibration": {
          "method": "entropy",
          "windows": 512,
          "percentile": null,
          "seconds": 9.634419918060303
        },
        "scope": "conv",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 156,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 78,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.012535125017166138,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9659599989891052,
          "pck@50": 0.9918666648864746,
          "mpjpe": 0.010778637571632861,
          "wall_seconds": 41.01180171966553
        }
      },
      "percentile_all": {
        "file": "retrained_int8_static_percentile_all.onnx",
        "size_bytes": 2604052,
        "size_mb": 2.604052,
        "calibration": {
          "method": "percentile",
          "windows": 512,
          "percentile": 99.99,
          "seconds": 20.221954584121704
        },
        "scope": "all",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 283,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 181,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.017689883708953857,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9639333323478698,
          "pck@50": 0.9916799991607667,
          "mpjpe": 0.012176512064039708,
          "wall_seconds": 49.365190744400024
        }
      },
      "percentile_conv": {
        "file": "retrained_int8_static_percentile_conv.onnx",
        "size_bytes": 2527241,
        "size_mb": 2.527241,
        "calibration": {
          "method": "percentile",
          "windows": 512,
          "percentile": 99.99,
          "seconds": 8.223475694656372
        },
        "scope": "conv",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 156,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 78,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.014725983142852783,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9660599988937378,
          "pck@50": 0.9916066654205322,
          "mpjpe": 0.010310938355326652,
          "wall_seconds": 36.89548587799072
        }
      }
    },
    "latency": {
      "note": "3 interleaved repetitions per variant, median ms/window; onnx_fp32 / onnx_int8_ort_dynamic are same-session references",
      "onnx_fp32": {
        "batch1_reps": [
          4.5327999996516155,
          2.535649999117595,
          2.167549997466267
        ],
        "batch64_reps": [
          1.9354515624740998,
          2.4948054687854437,
          1.9334703125082342
        ],
        "batch1_ms_per_window_median": 2.535649999117595,
        "batch64_ms_per_window_median": 1.9354515624740998
      },
      "onnx_int8_ort_dynamic": {
        "batch1_reps": [
          5.698599999959697,
          5.721350000385428,
          4.805099997611251
        ],
        "batch64_reps": [
          4.096601562508795,
          4.857628124995017,
          4.583800000006022
        ],
        "batch1_ms_per_window_median": 5.698599999959697,
        "batch64_ms_per_window_median": 4.583800000006022
      },
      "entropy_all": {
        "batch1_reps": [
          6.444149999879301,
          5.038299999796436,
          5.713200000172947
        ],
        "batch64_reps": [
          4.149468750028973,
          3.437125000004926,
          4.410960937491382
        ],
        "batch1_ms_per_window_median": 5.713200000172947,
        "batch64_ms_per_window_median": 4.149468750028973
      },
      "entropy_conv": {
        "batch1_reps": [
          4.874750000453787,
          5.169099998965976,
          5.236699998931726
        ],
        "batch64_reps": [
          3.010160156236452,
          3.1175546875203963,
          3.516850781238645
        ],
        "batch1_ms_per_window_median": 5.169099998965976,
        "batch64_ms_per_window_median": 3.1175546875203963
      },
      "percentile_all": {
        "batch1_reps": [
          5.184749999898486,
          5.2898499998264015,
          5.916899999647285
        ],
        "batch64_reps": [
          4.305105468745296,
          4.460741406262514,
          4.184502343747454
        ],
        "batch1_ms_per_window_median": 5.2898499998264015,
        "batch64_ms_per_window_median": 4.305105468745296
      },
      "percentile_conv": {
        "batch1_reps": [
          4.916449999655015,
          7.150899999032845,
          5.284949998895172
        ],
        "batch64_reps": [
          3.855813281262499,
          4.688969531230214,
          5.220103124997877
        ],
        "batch1_ms_per_window_median": 5.284949998895172,
        "batch64_ms_per_window_median": 4.688969531230214
      },
      "minmax_all": {
        "batch1_reps": [
          6.463300000177696,
          7.149449998905766,
          5.3209000016067876
        ],
        "batch64_reps": [
          3.9251343750095202,
          4.033442187505898,
          3.428199218745931
        ],
        "batch1_ms_per_window_median": 6.463300000177696,
        "batch64_ms_per_window_median": 3.9251343750095202
      },
      "minmax_conv": {
        "batch1_reps": [
          5.9961499991914025,
          5.236549999608542,
          4.854399998293957
        ],
        "batch64_reps": [
          4.368359375007458,
          3.249617187492504,
          3.0238906249735464
        ],
        "batch1_ms_per_window_median": 5.236549999608542,
        "batch64_ms_per_window_median": 3.249617187492504
      }
    },
    "accuracy_subset": {
      "description": "seed-42 file-level 70/15/15 test split, corrupted windows excluded, seed-42 random subset (same as quantize_bench/eval_ort_accuracy)",
      "subset_size": 10000
    }
  },
  "tiny_variant": {
    "env": {
      "torch": "2.12.0+cpu",
      "onnxruntime": "1.26.0",
      "platform": "Windows-11-10.0.26200-SP0",
      "num_threads": 16,
      "checkpoint": "results\\tiny_best.pth",
      "checkpoint_size_bytes": 340555,
      "params": 56290,
      "variant_config": {
        "tcn": [
          68,
          56,
          44,
          32
        ],
        "conv": [
          2,
          4,
          8,
          16
        ],
        "attn_groups": 2,
        "groups_mode": "depthwise",
        "input_pw_groups": 4
      }
    },
    "export": {
      "mode": "dynamic-batch",
      "exporter": "torchscript",
      "opset": 17,
      "file": "tiny_fp32_dynamic.onnx",
      "size_bytes": 295279,
      "size_mb": 0.295279,
      "verified_batches": [
        1,
        2,
        64
      ],
      "note": "AdaptiveAvgPool2d((15,1)) replaced at export by an exact mean(-1) + constant averaging matmul (final_width 16 is not a multiple of 15, which the TorchScript exporter rejects); exactness proven by the parity check vs the original torch model"
    },
    "parity": {
      "fixture": "results/parity_fixture.npz input (batch 2, seed 42); reference output recomputed with the tiny torch model",
      "max_abs_diff_vs_torch": 1.4901161193847656e-07,
      "pass_lt_1e-4": true
    },
    "int8_static_percentile_conv": {
      "file": "tiny_int8_static_percentile_conv.onnx",
      "size_bytes": 248278,
      "size_mb": 0.248278,
      "calibration": {
        "method": "percentile",
        "percentile": 99.99,
        "windows": 512,
        "scope": "conv-only TRAIN-split corruption-free",
        "seconds": 1.5347836017608643
      },
      "per_channel": true,
      "activation_type": "QInt8",
      "weight_type": "QInt8",
      "max_abs_diff_vs_fp32_fixture": 0.018491357564926147
    },
    "latency": {
      "note": "3 interleaved repetitions per variant, median ms/window; full-model sessions are same-session references",
      "tiny_onnx_fp32": {
        "batch1_reps": [
          0.6312500008789357,
          0.6834500018157996,
          0.6595999984710943
        ],
        "batch64_reps": [
          0.37747578119251557,
          0.24196640623586063,
          0.2314671875183194
        ],
        "batch1_ms_per_window_median": 0.6595999984710943,
        "batch64_ms_per_window_median": 0.24196640623586063
      },
      "tiny_onnx_int8_static_percentile_conv": {
        "batch1_reps": [
          0.7988500001374632,
          0.9382499993080273,
          0.8451000030618161
        ],
        "batch64_reps": [
          0.9211476562995813,
          1.3045390625165965,
          1.026230468767153
        ],
        "batch1_ms_per_window_median": 0.8451000030618161,
        "batch64_ms_per_window_median": 1.026230468767153
      },
      "full_onnx_fp32_reference": {
        "batch1_reps": [
          2.267249998112675,
          2.80170000041835,
          2.132149998942623
        ],
        "batch64_reps": [
          1.3050578124875756,
          1.4244992187855132,
          1.8014164062947202
        ],
        "batch1_ms_per_window_median": 2.267249998112675,
        "batch64_ms_per_window_median": 1.4244992187855132
      },
      "full_onnx_int8_static_percentile_conv_reference": {
        "batch1_reps": [
          5.529599999135826,
          4.768399998283712,
          6.215800000063609
        ],
        "batch64_reps": [
          3.815724218725336,
          3.1025562500417436,
          4.333318749957016
        ],
        "batch1_ms_per_window_median": 5.529599999135826,
        "batch64_ms_per_window_median": 3.815724218725336
      }
    },
    "accuracy_subset": {
      "description": "seed-42 file-level 70/15/15 test split, corrupted windows excluded, seed-42 random subset (same as quantize_bench/eval_ort_accuracy/static_ptq_bench)",
      "subset_size": 10000
    },
    "accuracy": {
      "tiny_onnx_fp32": {
        "samples": 10000,
        "pck@20": 0.941106667804718,
        "pck@50": 0.99369333152771,
        "mpjpe": 0.012527281279861927,
        "wall_seconds": 10.927234888076782
      },
      "tiny_onnx_int8_static_percentile_conv": {
        "samples": 10000,
        "pck@20": 0.9268133331298828,
        "pck@50": 0.9932933319091797,
        "mpjpe": 0.014906252065300942,
        "wall_seconds": 12.320892333984375
      }
    }
  }
}