{
  "generated_at": "2026-05-08T05:29:24.238915Z",
  "electricity_usd_per_kwh": 0.3148,
  "assumptions": {
    "amortization_24x7_hours": 26280,
    "amortization_40h_hours": 6240,
    "beastmode_note": "BeastMode cost-per-token and hourly models are lower-bound capital-only estimates because per-host power telemetry is not exposed cleanly through the ESXi management path used here."
  },
  "study": {
    "title": "Calculating the Total Cost of a GPU Cluster",
    "pricing_snapshot": "February 2026",
    "scenario_1_ratio": {
      "Nebius": 1.0,
      "AWS": 1.09,
      "Silver-tier": 1.08
    },
    "scenario_2_ratio": {
      "Nebius": 1.0,
      "AWS": 1.43,
      "Silver-tier": 1.08
    },
    "scenario_3_ratio": {
      "Nebius": 1.0,
      "AWS": 2.13,
      "Silver-tier": 1.04
    },
    "scenario_3_note": "The study attributes the AWS premium in inference to GPU instance price, support, storage, and setup overhead even after assuming a large discount off p5en list price."
  },
  "local": {
    "mac_mini": {
      "label": "Mac mini",
      "price_usd": 1609.0,
      "price_note": "Apple Certified Refurbished current-market anchor for an M4 Pro / 48GB / 512GB class system; the cited Apple listing uses 10GbE, which slightly overstates this exact box.",
      "power_w": 155,
      "power_note": "Apple maximum continuous power from current Mac mini technical specifications.",
      "specs": "Apple M4 Pro, 12 CPU cores (8P+4E), 48GB unified memory, 273 GB/s memory bandwidth.",
      "sample_model": "qwen2.5:7b",
      "sample_tokens_per_s": 47.76560945427192,
      "sample_prompt_tokens": 52,
      "sample_output_tokens": 20,
      "sample_total_s": 3.953212584,
      "interactive_rps": 0.265,
      "interactive_note": "Observed at concurrency 4 in the controlled text-capacity benchmark."
    },
    "dgx_spark": {
      "label": "DGX Spark",
      "price_usd": 4699.0,
      "price_note": "Current NVIDIA Marketplace listed price for DGX Spark bundle.",
      "power_w": 240,
      "power_note": "Conservative upper bound using the bundled 240W external power supply; actual sustained draw is often lower.",
      "specs": "NVIDIA GB10 Grace Blackwell Superchip, up to 1 PFLOP FP4, 128GB unified memory, 20-core Arm CPU, 273 GB/s memory bandwidth.",
      "sample_model": "qwen3:8b",
      "sample_tokens_per_s": 41.974065544911795,
      "sample_prompt_tokens": 39,
      "sample_output_tokens": 29,
      "sample_total_s": 2.735895899,
      "interactive_rps": 0.135,
      "interactive_note": "Observed at concurrency 2 in the controlled text-capacity benchmark comfort band."
    }
  },
  "beastmode": {
    "chewbacuh": {
      "label": "Chewbacuh",
      "host_price_usd": 977.0,
      "host_price_note": "Current Newegg market listing for a similar R730xd with 2x E5-2620 v3 and 128GB RAM; host share is conservative because the lane uses only part of the box.",
      "share_fraction": 0.3333333333333333,
      "vm_vcpu": 8,
      "vm_ram_gib": 48,
      "sample_model": "qwen3:8b",
      "sample_tokens_per_s": 3.316102255849126,
      "benchmark_rps": 0.098,
      "benchmark_note": "Observed plateau throughput across concurrency 1-4."
    },
    "lil_beastly": {
      "label": "LiL-Beastly",
      "host_price_usd": 1339.7,
      "host_price_note": "Current TechMikeNY market listing for a 512GB R730xd-class system; lane share priced by vCPU fraction because the host is shared infrastructure.",
      "share_fraction": 0.5,
      "vm_vcpu": 12,
      "vm_ram_gib": 96,
      "sample_model": "qwen3:14b",
      "sample_tokens_per_s": 1.776796715325552,
      "benchmark_rps": 0.051,
      "benchmark_note": "Observed plateau throughput across concurrency 1-4."
    }
  },
  "cloud": {
    "runpod_l4": {
      "label": "Runpod L4",
      "hourly_usd": 0.39,
      "tier": "Budget inference GPU",
      "spec_note": "24GB L4 on Secure Cloud, on-demand starting price."
    },
    "nebius_l40s": {
      "label": "Nebius L40S",
      "hourly_usd": 1.35,
      "tier": "Mid-range enterprise GPU",
      "spec_note": "Per-GPU price; CPU and RAM are additional on L40S VM pricing."
    },
    "nebius_h200": {
      "label": "Nebius H200",
      "hourly_usd": 3.5,
      "tier": "Premium inference GPU",
      "spec_note": "Unified per-GPU hour price including vCPU and RAM on H200 VMs."
    },
    "aws_p5en_h200": {
      "label": "AWS p5en H200",
      "hourly_usd": 5.721,
      "tier": "Hyperscaler premium GPU",
      "spec_note": "Effective per-accelerator hourly rate from the current Capacity Blocks for ML pricing table."
    },
    "nebius_cpu": {
      "label": "Nebius CPU-only equivalent",
      "cpu_hour_usd": 0.012,
      "ram_gibhour_usd": 0.0032,
      "tier": "CPU cloud baseline",
      "spec_note": "Non-GPU AMD EPYC Genoa VM pricing."
    }
  },
  "sources": [
    {
      "label": "SemiAnalysis / Nebius study PDF used for TCO context",
      "url": "file:///Users/nation/Downloads/nebius-semianalysis-real-cost-of-gpu-clusters.pdf",
      "note": "Local study provided by user; pricing snapshot in the study is February 2026."
    },
    {
      "label": "Apple Certified Refurbished Mac mini (M4 Pro / 48GB / 512GB / 10GbE)",
      "url": "https://www.apple.com/shop/product/g1kzmll/a/Refurbished-Mac-mini-Apple-M4-Pro-Chip-with-12-Core-CPU-and-16-Core-GPU-10Gb-Ethernet-",
      "note": "Used as a current-market anchor for the Mac mini capital cost."
    },
    {
      "label": "Apple Mac mini technical specifications",
      "url": "https://www.apple.com/mac-mini/specs/",
      "note": "Used for M4 Pro architecture details and maximum continuous power."
    },
    {
      "label": "NVIDIA DGX Spark marketplace listing",
      "url": "https://marketplace.nvidia.com/en-us/enterprise/personal-ai-supercomputers/dgx-spark/",
      "note": "Used for current published DGX Spark price."
    },
    {
      "label": "NVIDIA DGX Spark hardware overview",
      "url": "https://docs.nvidia.com/dgx/dgx-spark/hardware.html",
      "note": "Used for DGX Spark system specs and power envelope."
    },
    {
      "label": "Runpod L4 cloud pricing",
      "url": "https://www.runpod.io/gpu-models/l4",
      "note": "Used for budget L4 cloud baseline."
    },
    {
      "label": "Nebius AI Cloud compute pricing",
      "url": "https://docs.nebius.com/compute/resources/pricing",
      "note": "Used for L40S, H200, and CPU-only cloud baselines."
    },
    {
      "label": "AWS EC2 Capacity Blocks for ML pricing",
      "url": "https://aws.amazon.com/ec2/capacityblocks/pricing/",
      "note": "Used for p5en H200 per-accelerator pricing."
    },
    {
      "label": "U.S. EIA electricity price table, California YTD through Feb 2026",
      "url": "https://www.eia.gov/electricity/monthly/epm_table_grapher.php?t=epmt_5_06_b",
      "note": "Used for local power-cost assumption of $0.3148/kWh."
    },
    {
      "label": "Newegg Dell R730xd market listing",
      "url": "https://www.newegg.com/dell-poweredge-r730xd-rack/p/2NS-0008-703C5",
      "note": "Used as the vm1/chewbacuh host replacement-cost anchor."
    },
    {
      "label": "TechMikeNY Dell R730xd market listing",
      "url": "https://techmikeny.com/products/dell-poweredge-r730xd-server-24-bay-2-20ghz-40-core-512gb-ram-26x-caddies",
      "note": "Used as the vm2/LiL-Beastly host replacement-cost anchor."
    }
  ],
  "computed": {
    "local_rows": [
      {
        "id": "mac_mini",
        "label": "Mac mini",
        "hourly_24x7": 0.11001926636225268,
        "hourly_40h": 0.30664656410256413,
        "capex_24x7": 0.061225266362252666,
        "capex_40h": 0.25785256410256413,
        "tokens_per_s": 47.76560945427192,
        "million_tokens_per_hour": 0.1719561940353789,
        "cost_per_mtok_24x7": 0.6398098479640513,
        "cost_per_mtok_40h": 1.7832830380013733
      },
      {
        "id": "dgx_spark",
        "label": "DGX Spark",
        "hourly_24x7": 0.2543571750380518,
        "hourly_40h": 0.8285968717948717,
        "capex_24x7": 0.17880517503805174,
        "capex_40h": 0.7530448717948718,
        "tokens_per_s": 41.974065544911795,
        "million_tokens_per_hour": 0.15110663596168245,
        "cost_per_mtok_24x7": 1.6832958620199285,
        "cost_per_mtok_40h": 5.483524045925997
      }
    ],
    "beast_rows": [
      {
        "id": "chewbacuh",
        "label": "Chewbacuh",
        "lane_capex_usd": 325.66666666666663,
        "capex_24x7": 0.012392186707255198,
        "capex_40h": 0.052190170940170935,
        "tokens_per_s": 3.316102255849126,
        "million_tokens_per_hour": 0.011937968121056854,
        "cost_per_mtok_24x7": 1.038048232462371,
        "cost_per_mtok_40h": 4.371780055947293,
        "equivalent_cloud_hourly": 0.24960000000000002,
        "breakeven_hours_vs_cpu_cloud": 1304.7542735042732
      },
      {
        "id": "lil_beastly",
        "label": "LiL-Beastly",
        "lane_capex_usd": 669.85,
        "capex_24x7": 0.025488964992389652,
        "capex_40h": 0.10734775641025641,
        "tokens_per_s": 1.776796715325552,
        "million_tokens_per_hour": 0.006396468175171987,
        "cost_per_mtok_24x7": 3.984849809981945,
        "cost_per_mtok_40h": 16.78234823819319,
        "equivalent_cloud_hourly": 0.45120000000000005,
        "breakeven_hours_vs_cpu_cloud": 1484.5966312056737
      }
    ],
    "pdf": "committee-review-cloud-economics-20260507.pdf",
    "charts": [
      "beastmode-cpu-cloud.png",
      "breakeven-days.png",
      "local-token-cost.png",
      "local-vs-cloud-hourly.png"
    ]
  }
}