{
  "mac_mini": {
    "warm_results": [
      {
        "request_id": "warm-mac_mini-1",
        "model": "qwen2.5:7b",
        "ok": true,
        "started_at": 1778137876.7394822,
        "latency_s": 1.569229250017088,
        "finish_reason": "stop",
        "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
        "prompt_tokens": 77,
        "completion_tokens": 24,
        "total_tokens": 101
      },
      {
        "request_id": "warm-mac_mini-2",
        "model": "qwen3:30b",
        "ok": true,
        "started_at": 1778137878.3087842,
        "latency_s": 12.374412957986351,
        "finish_reason": "length",
        "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
        "prompt_tokens": 79,
        "completion_tokens": 96,
        "total_tokens": 175
      },
      {
        "request_id": "warm-mac_mini-3",
        "model": "nemotron-3-nano:30b",
        "ok": true,
        "started_at": 1778137890.6834922,
        "latency_s": 17.03599041700363,
        "finish_reason": "length",
        "content_preview": "- **Batch & pipeline requests** \u2013 group incoming messages and process them in parallel pipelines to amortize GPU kernel ",
        "prompt_tokens": 83,
        "completion_tokens": 96,
        "total_tokens": 179
      }
    ],
    "steps": [
      {
        "summary": {
          "concurrency": 1,
          "total_requests": 9,
          "request_count": 9,
          "success_count": 9,
          "error_count": 0,
          "error_rate": 0.0,
          "wall_s": 87.45133516698843,
          "throughput_rps": 0.10291438069887085,
          "latency_avg_s": 9.716113037099907,
          "latency_p50_s": 10.071201917016879,
          "latency_p95_s": 13.570814591576346,
          "latency_max_s": 14.102356791961938,
          "completion_tps": 7.60422924052768,
          "sample_errors": [],
          "peak_cpu_busy_pct": 67.17999999999999,
          "peak_load1": 7.07,
          "min_memory_free_pct": 15.0,
          "thermal_warning_seen": false,
          "performance_warning_seen": false,
          "thermal_status_peak": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded"
        },
        "requests": [
          {
            "request_id": "mac_mini-1-1",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778137907.9312792,
            "latency_s": 5.249839291966055,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency: Reduce latency with model quantization or pruning.\n- Implement load balancing: Distribute i",
            "prompt_tokens": 77,
            "completion_tokens": 41,
            "total_tokens": 118
          },
          {
            "request_id": "mac_mini-1-2",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778137913.181327,
            "latency_s": 9.296648624993395,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster and wants three short operational tip",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-1-3",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778137922.478168,
            "latency_s": 12.773501290997956,
            "finish_reason": "length",
            "content_preview": "- **Batch requests & use async I/O** \u2013 Process multiple user messages in parallel pipelines to avoid blocking a single t",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-1-4",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778137935.251866,
            "latency_s": 5.382027915969957,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-1-5",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778137940.634131,
            "latency_s": 10.071201917016879,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-1-6",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778137950.705482,
            "latency_s": 14.102356791961938,
            "finish_reason": "length",
            "content_preview": "- **Scale horizontally with auto\u2011scale groups** \u2013 add instances when request rate spikes, and keep them warm to avoid co",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-1-7",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778137964.8080168,
            "latency_s": 10.131589416996576,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-1-8",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778137974.939845,
            "latency_s": 8.941197249980178,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-1-9",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778137983.8811882,
            "latency_s": 11.496654834016226,
            "finish_reason": "length",
            "content_preview": "- **",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          }
        ],
        "samples": [
          {
            "timestamp": 1778137907.7199202,
            "unit": "mac_mini",
            "load1": 5.83,
            "load5": 5.79,
            "load15": 4.81,
            "cpu_user_pct": 21.45,
            "cpu_sys_pct": 12.87,
            "cpu_idle_pct": 65.66,
            "cpu_busy_pct": 34.32,
            "phys_mem_used": "47G",
            "phys_mem_unused": "511M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137910.970124,
            "unit": "mac_mini",
            "load1": 5.68,
            "load5": 5.76,
            "load15": 4.81,
            "cpu_user_pct": 22.97,
            "cpu_sys_pct": 17.44,
            "cpu_idle_pct": 59.57,
            "cpu_busy_pct": 40.41,
            "phys_mem_used": "47G",
            "phys_mem_unused": "283M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137914.271209,
            "unit": "mac_mini",
            "load1": 5.68,
            "load5": 5.76,
            "load15": 4.81,
            "cpu_user_pct": 21.79,
            "cpu_sys_pct": 15.81,
            "cpu_idle_pct": 62.39,
            "cpu_busy_pct": 37.6,
            "phys_mem_used": "41G",
            "phys_mem_unused": "6529M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137917.521121,
            "unit": "mac_mini",
            "load1": 5.47,
            "load5": 5.72,
            "load15": 4.8,
            "cpu_user_pct": 36.75,
            "cpu_sys_pct": 22.22,
            "cpu_idle_pct": 41.2,
            "cpu_busy_pct": 58.97,
            "phys_mem_used": "44G",
            "phys_mem_unused": "3340M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137920.772452,
            "unit": "mac_mini",
            "load1": 5.59,
            "load5": 5.74,
            "load15": 4.81,
            "cpu_user_pct": 26.49,
            "cpu_sys_pct": 19.23,
            "cpu_idle_pct": 54.27,
            "cpu_busy_pct": 45.72,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1502M",
            "memory_free_pct": 36.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137924.030206,
            "unit": "mac_mini",
            "load1": 5.59,
            "load5": 5.74,
            "load15": 4.81,
            "cpu_user_pct": 19.91,
            "cpu_sys_pct": 17.84,
            "cpu_idle_pct": 62.24,
            "cpu_busy_pct": 37.75,
            "phys_mem_used": "47G",
            "phys_mem_unused": "595M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137927.293612,
            "unit": "mac_mini",
            "load1": 5.54,
            "load5": 5.73,
            "load15": 4.81,
            "cpu_user_pct": 22.4,
            "cpu_sys_pct": 18.67,
            "cpu_idle_pct": 58.92,
            "cpu_busy_pct": 41.07,
            "phys_mem_used": "47G",
            "phys_mem_unused": "609M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137930.5966232,
            "unit": "mac_mini",
            "load1": 5.5,
            "load5": 5.71,
            "load15": 4.81,
            "cpu_user_pct": 22.78,
            "cpu_sys_pct": 18.14,
            "cpu_idle_pct": 59.7,
            "cpu_busy_pct": 40.92,
            "phys_mem_used": "47G",
            "phys_mem_unused": "645M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137933.860318,
            "unit": "mac_mini",
            "load1": 5.5,
            "load5": 5.71,
            "load15": 4.81,
            "cpu_user_pct": 15.25,
            "cpu_sys_pct": 11.86,
            "cpu_idle_pct": 72.88,
            "cpu_busy_pct": 27.11,
            "phys_mem_used": "47G",
            "phys_mem_unused": "647M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137937.116744,
            "unit": "mac_mini",
            "load1": 5.14,
            "load5": 5.64,
            "load15": 4.79,
            "cpu_user_pct": 13.98,
            "cpu_sys_pct": 20.33,
            "cpu_idle_pct": 65.67,
            "cpu_busy_pct": 34.31,
            "phys_mem_used": "47G",
            "phys_mem_unused": "425M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137940.395255,
            "unit": "mac_mini",
            "load1": 5.14,
            "load5": 5.64,
            "load15": 4.79,
            "cpu_user_pct": 17.91,
            "cpu_sys_pct": 14.58,
            "cpu_idle_pct": 67.5,
            "cpu_busy_pct": 32.49,
            "phys_mem_used": "47G",
            "phys_mem_unused": "378M",
            "memory_free_pct": 15.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137943.657977,
            "unit": "mac_mini",
            "load1": 5.13,
            "load5": 5.62,
            "load15": 4.79,
            "cpu_user_pct": 12.29,
            "cpu_sys_pct": 15.98,
            "cpu_idle_pct": 71.72,
            "cpu_busy_pct": 28.27,
            "phys_mem_used": "43G",
            "phys_mem_unused": "4453M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137946.9358552,
            "unit": "mac_mini",
            "load1": 4.8,
            "load5": 5.55,
            "load15": 4.77,
            "cpu_user_pct": 40.62,
            "cpu_sys_pct": 26.56,
            "cpu_idle_pct": 32.81,
            "cpu_busy_pct": 67.17999999999999,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1135M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137950.221809,
            "unit": "mac_mini",
            "load1": 4.8,
            "load5": 5.55,
            "load15": 4.77,
            "cpu_user_pct": 31.55,
            "cpu_sys_pct": 15.16,
            "cpu_idle_pct": 53.27,
            "cpu_busy_pct": 46.71,
            "phys_mem_used": "47G",
            "phys_mem_unused": "890M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137953.485321,
            "unit": "mac_mini",
            "load1": 5.05,
            "load5": 5.59,
            "load15": 4.79,
            "cpu_user_pct": 16.0,
            "cpu_sys_pct": 18.0,
            "cpu_idle_pct": 66.0,
            "cpu_busy_pct": 34.0,
            "phys_mem_used": "47G",
            "phys_mem_unused": "607M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137956.7459872,
            "unit": "mac_mini",
            "load1": 5.05,
            "load5": 5.58,
            "load15": 4.79,
            "cpu_user_pct": 19.91,
            "cpu_sys_pct": 17.42,
            "cpu_idle_pct": 62.65,
            "cpu_busy_pct": 37.33,
            "phys_mem_used": "47G",
            "phys_mem_unused": "627M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137960.01851,
            "unit": "mac_mini",
            "load1": 5.05,
            "load5": 5.58,
            "load15": 4.79,
            "cpu_user_pct": 20.81,
            "cpu_sys_pct": 15.91,
            "cpu_idle_pct": 63.26,
            "cpu_busy_pct": 36.72,
            "phys_mem_used": "47G",
            "phys_mem_unused": "682M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137963.2841558,
            "unit": "mac_mini",
            "load1": 5.04,
            "load5": 5.57,
            "load15": 4.79,
            "cpu_user_pct": 23.93,
            "cpu_sys_pct": 14.52,
            "cpu_idle_pct": 61.53,
            "cpu_busy_pct": 38.45,
            "phys_mem_used": "47G",
            "phys_mem_unused": "551M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137966.539916,
            "unit": "mac_mini",
            "load1": 5.6,
            "load5": 5.67,
            "load15": 4.83,
            "cpu_user_pct": 17.62,
            "cpu_sys_pct": 23.77,
            "cpu_idle_pct": 58.6,
            "cpu_busy_pct": 41.39,
            "phys_mem_used": "47G",
            "phys_mem_unused": "335M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137969.891623,
            "unit": "mac_mini",
            "load1": 5.6,
            "load5": 5.67,
            "load15": 4.83,
            "cpu_user_pct": 23.36,
            "cpu_sys_pct": 24.59,
            "cpu_idle_pct": 52.4,
            "cpu_busy_pct": 47.95,
            "phys_mem_used": "47G",
            "phys_mem_unused": "519M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137973.149695,
            "unit": "mac_mini",
            "load1": 5.79,
            "load5": 5.71,
            "load15": 4.85,
            "cpu_user_pct": 25.1,
            "cpu_sys_pct": 20.85,
            "cpu_idle_pct": 54.4,
            "cpu_busy_pct": 45.95,
            "phys_mem_used": "47G",
            "phys_mem_unused": "357M",
            "memory_free_pct": 23.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137976.3976161,
            "unit": "mac_mini",
            "load1": 6.29,
            "load5": 5.82,
            "load15": 4.89,
            "cpu_user_pct": 27.8,
            "cpu_sys_pct": 21.25,
            "cpu_idle_pct": 51.66,
            "cpu_busy_pct": 49.05,
            "phys_mem_used": "41G",
            "phys_mem_unused": "6497M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137979.655377,
            "unit": "mac_mini",
            "load1": 6.29,
            "load5": 5.82,
            "load15": 4.89,
            "cpu_user_pct": 21.55,
            "cpu_sys_pct": 18.53,
            "cpu_idle_pct": 59.91,
            "cpu_busy_pct": 40.08,
            "phys_mem_used": "44G",
            "phys_mem_unused": "3498M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137982.943807,
            "unit": "mac_mini",
            "load1": 7.07,
            "load5": 5.99,
            "load15": 4.96,
            "cpu_user_pct": 36.75,
            "cpu_sys_pct": 17.9,
            "cpu_idle_pct": 46.15,
            "cpu_busy_pct": 54.65,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1243M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137986.194933,
            "unit": "mac_mini",
            "load1": 6.74,
            "load5": 5.94,
            "load15": 4.94,
            "cpu_user_pct": 19.4,
            "cpu_sys_pct": 18.14,
            "cpu_idle_pct": 62.44,
            "cpu_busy_pct": 37.54,
            "phys_mem_used": "47G",
            "phys_mem_unused": "674M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137989.4540539,
            "unit": "mac_mini",
            "load1": 6.74,
            "load5": 5.94,
            "load15": 4.94,
            "cpu_user_pct": 20.25,
            "cpu_sys_pct": 18.14,
            "cpu_idle_pct": 61.6,
            "cpu_busy_pct": 38.39,
            "phys_mem_used": "47G",
            "phys_mem_unused": "648M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137992.713684,
            "unit": "mac_mini",
            "load1": 6.36,
            "load5": 5.87,
            "load15": 4.93,
            "cpu_user_pct": 21.0,
            "cpu_sys_pct": 13.86,
            "cpu_idle_pct": 65.12,
            "cpu_busy_pct": 34.86,
            "phys_mem_used": "47G",
            "phys_mem_unused": "595M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778137995.583227,
            "unit": "mac_mini",
            "load1": 6.01,
            "load5": 5.81,
            "load15": 4.91,
            "cpu_user_pct": 24.3,
            "cpu_sys_pct": 15.87,
            "cpu_idle_pct": 60.8,
            "cpu_busy_pct": 40.17,
            "phys_mem_used": "47G",
            "phys_mem_unused": "399M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          }
        ]
      },
      {
        "summary": {
          "concurrency": 2,
          "total_requests": 9,
          "request_count": 9,
          "success_count": 9,
          "error_count": 0,
          "error_rate": 0.0,
          "wall_s": 73.10480725002708,
          "throughput_rps": 0.12311091894708014,
          "latency_avg_s": 14.896224333322607,
          "latency_p50_s": 13.752180750016123,
          "latency_p95_s": 21.623434282978998,
          "latency_max_s": 21.790378832956776,
          "completion_tps": 8.97341809214273,
          "sample_errors": [],
          "peak_cpu_busy_pct": 64.6,
          "peak_load1": 6.68,
          "min_memory_free_pct": 13.0,
          "thermal_warning_seen": false,
          "performance_warning_seen": false,
          "thermal_status_peak": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded"
        },
        "requests": [
          {
            "request_id": "mac_mini-2-1",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778137998.034049,
            "latency_s": 4.703316375031136,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-2-2",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778137998.0342631,
            "latency_s": 13.336477916978765,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They need three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-2-3",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138002.737546,
            "latency_s": 21.082235333975405,
            "finish_reason": "length",
            "content_preview": "- **Batch requests & use async I/O** \u2013 Process multiple incoming messages in parallel and return responses as soon as ea",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-2-4",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138011.370941,
            "latency_s": 13.752180750016123,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-2-5",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138023.8200588,
            "latency_s": 10.15448537498014,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-2-6",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138025.123323,
            "latency_s": 21.790378832956776,
            "finish_reason": "length",
            "content_preview": "- **Batch requests**: Group incoming messages and process them in parallel batches to maximize GPU utilization.  \n- **Li",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-2-7",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138033.97474,
            "latency_s": 15.790515417000279,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency: Reduce inference latency.\n- Implement load balancing: Distribute traffic evenly.\n- Use cach",
            "prompt_tokens": 77,
            "completion_tokens": 32,
            "total_tokens": 109
          },
          {
            "request_id": "mac_mini-2-8",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138046.9139168,
            "latency_s": 12.083411540952511,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-2-9",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138049.765472,
            "latency_s": 21.373017458012328,
            "finish_reason": "length",
            "content_preview": "- **Batch requests & use async I/O** \u2013 Process multiple incoming messages in parallel and avoid blocking calls.  \n- **Sc",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          }
        ],
        "samples": [
          {
            "timestamp": 1778137997.8285959,
            "unit": "mac_mini",
            "load1": 6.01,
            "load5": 5.81,
            "load15": 4.91,
            "cpu_user_pct": 19.6,
            "cpu_sys_pct": 11.86,
            "cpu_idle_pct": 69.6,
            "cpu_busy_pct": 31.46,
            "phys_mem_used": "47G",
            "phys_mem_unused": "339M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138001.0758998,
            "unit": "mac_mini",
            "load1": 5.85,
            "load5": 5.78,
            "load15": 4.9,
            "cpu_user_pct": 16.86,
            "cpu_sys_pct": 16.47,
            "cpu_idle_pct": 66.66,
            "cpu_busy_pct": 33.33,
            "phys_mem_used": "47G",
            "phys_mem_unused": "198M",
            "memory_free_pct": 17.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138004.3436449,
            "unit": "mac_mini",
            "load1": 5.85,
            "load5": 5.78,
            "load15": 4.9,
            "cpu_user_pct": 17.37,
            "cpu_sys_pct": 19.49,
            "cpu_idle_pct": 63.13,
            "cpu_busy_pct": 36.86,
            "phys_mem_used": "44G",
            "phys_mem_unused": "3675M",
            "memory_free_pct": 25.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138007.598489,
            "unit": "mac_mini",
            "load1": 6.02,
            "load5": 5.81,
            "load15": 4.92,
            "cpu_user_pct": 20.76,
            "cpu_sys_pct": 19.6,
            "cpu_idle_pct": 60.16,
            "cpu_busy_pct": 40.36,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1647M",
            "memory_free_pct": 25.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138010.859754,
            "unit": "mac_mini",
            "load1": 5.94,
            "load5": 5.8,
            "load15": 4.92,
            "cpu_user_pct": 31.38,
            "cpu_sys_pct": 18.82,
            "cpu_idle_pct": 49.79,
            "cpu_busy_pct": 50.2,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1538M",
            "memory_free_pct": 36.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138014.117068,
            "unit": "mac_mini",
            "load1": 5.94,
            "load5": 5.8,
            "load15": 4.92,
            "cpu_user_pct": 15.98,
            "cpu_sys_pct": 18.3,
            "cpu_idle_pct": 65.98,
            "cpu_busy_pct": 34.28,
            "phys_mem_used": "47G",
            "phys_mem_unused": "611M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138017.381909,
            "unit": "mac_mini",
            "load1": 5.7,
            "load5": 5.75,
            "load15": 4.91,
            "cpu_user_pct": 24.58,
            "cpu_sys_pct": 17.5,
            "cpu_idle_pct": 57.91,
            "cpu_busy_pct": 42.08,
            "phys_mem_used": "47G",
            "phys_mem_unused": "632M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138020.6441731,
            "unit": "mac_mini",
            "load1": 5.65,
            "load5": 5.74,
            "load15": 4.91,
            "cpu_user_pct": 17.8,
            "cpu_sys_pct": 16.66,
            "cpu_idle_pct": 66.25,
            "cpu_busy_pct": 34.46,
            "phys_mem_used": "47G",
            "phys_mem_unused": "645M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138023.906967,
            "unit": "mac_mini",
            "load1": 5.65,
            "load5": 5.74,
            "load15": 4.91,
            "cpu_user_pct": 21.7,
            "cpu_sys_pct": 20.0,
            "cpu_idle_pct": 58.29,
            "cpu_busy_pct": 41.7,
            "phys_mem_used": "47G",
            "phys_mem_unused": "297M",
            "memory_free_pct": 13.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138027.163885,
            "unit": "mac_mini",
            "load1": 5.52,
            "load5": 5.71,
            "load15": 4.91,
            "cpu_user_pct": 19.24,
            "cpu_sys_pct": 18.82,
            "cpu_idle_pct": 61.92,
            "cpu_busy_pct": 38.06,
            "phys_mem_used": "44G",
            "phys_mem_unused": "3502M",
            "memory_free_pct": 25.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138030.429329,
            "unit": "mac_mini",
            "load1": 5.52,
            "load5": 5.71,
            "load15": 4.91,
            "cpu_user_pct": 24.89,
            "cpu_sys_pct": 25.72,
            "cpu_idle_pct": 49.37,
            "cpu_busy_pct": 50.61,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1051M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138033.701418,
            "unit": "mac_mini",
            "load1": 6.68,
            "load5": 5.95,
            "load15": 4.99,
            "cpu_user_pct": 27.7,
            "cpu_sys_pct": 18.77,
            "cpu_idle_pct": 54.14,
            "cpu_busy_pct": 46.47,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1525M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138036.951359,
            "unit": "mac_mini",
            "load1": 6.3,
            "load5": 5.88,
            "load15": 4.98,
            "cpu_user_pct": 20.83,
            "cpu_sys_pct": 21.66,
            "cpu_idle_pct": 57.5,
            "cpu_busy_pct": 42.489999999999995,
            "phys_mem_used": "47G",
            "phys_mem_unused": "569M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138040.210894,
            "unit": "mac_mini",
            "load1": 6.3,
            "load5": 5.88,
            "load15": 4.98,
            "cpu_user_pct": 15.31,
            "cpu_sys_pct": 21.27,
            "cpu_idle_pct": 63.4,
            "cpu_busy_pct": 36.58,
            "phys_mem_used": "47G",
            "phys_mem_unused": "654M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138043.468818,
            "unit": "mac_mini",
            "load1": 6.12,
            "load5": 5.85,
            "load15": 4.97,
            "cpu_user_pct": 35.84,
            "cpu_sys_pct": 20.37,
            "cpu_idle_pct": 43.77,
            "cpu_busy_pct": 56.21000000000001,
            "phys_mem_used": "47G",
            "phys_mem_unused": "499M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138046.7516859,
            "unit": "mac_mini",
            "load1": 6.35,
            "load5": 5.9,
            "load15": 4.99,
            "cpu_user_pct": 24.47,
            "cpu_sys_pct": 19.83,
            "cpu_idle_pct": 55.69,
            "cpu_busy_pct": 44.3,
            "phys_mem_used": "47G",
            "phys_mem_unused": "614M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138050.0123498,
            "unit": "mac_mini",
            "load1": 6.35,
            "load5": 5.9,
            "load15": 4.99,
            "cpu_user_pct": 41.5,
            "cpu_sys_pct": 23.1,
            "cpu_idle_pct": 35.47,
            "cpu_busy_pct": 64.6,
            "phys_mem_used": "43G",
            "phys_mem_unused": "4213M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138053.352143,
            "unit": "mac_mini",
            "load1": 6.48,
            "load5": 5.94,
            "load15": 5.01,
            "cpu_user_pct": 19.75,
            "cpu_sys_pct": 17.28,
            "cpu_idle_pct": 62.96,
            "cpu_busy_pct": 37.03,
            "phys_mem_used": "45G",
            "phys_mem_unused": "2570M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138056.611943,
            "unit": "mac_mini",
            "load1": 6.28,
            "load5": 5.9,
            "load15": 5.01,
            "cpu_user_pct": 18.58,
            "cpu_sys_pct": 16.37,
            "cpu_idle_pct": 65.4,
            "cpu_busy_pct": 34.95,
            "phys_mem_used": "47G",
            "phys_mem_unused": "599M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138059.867125,
            "unit": "mac_mini",
            "load1": 6.28,
            "load5": 5.9,
            "load15": 5.01,
            "cpu_user_pct": 16.94,
            "cpu_sys_pct": 19.83,
            "cpu_idle_pct": 63.22,
            "cpu_busy_pct": 36.769999999999996,
            "phys_mem_used": "47G",
            "phys_mem_unused": "659M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138063.13375,
            "unit": "mac_mini",
            "load1": 6.02,
            "load5": 5.86,
            "load15": 4.99,
            "cpu_user_pct": 17.64,
            "cpu_sys_pct": 16.8,
            "cpu_idle_pct": 65.54,
            "cpu_busy_pct": 34.44,
            "phys_mem_used": "47G",
            "phys_mem_unused": "531M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138066.3920882,
            "unit": "mac_mini",
            "load1": 6.02,
            "load5": 5.86,
            "load15": 5.0,
            "cpu_user_pct": 17.28,
            "cpu_sys_pct": 18.1,
            "cpu_idle_pct": 64.6,
            "cpu_busy_pct": 35.38,
            "phys_mem_used": "47G",
            "phys_mem_unused": "610M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138069.653284,
            "unit": "mac_mini",
            "load1": 6.02,
            "load5": 5.86,
            "load15": 5.0,
            "cpu_user_pct": 24.59,
            "cpu_sys_pct": 15.57,
            "cpu_idle_pct": 59.83,
            "cpu_busy_pct": 40.16,
            "phys_mem_used": "47G",
            "phys_mem_unused": "572M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138071.343934,
            "unit": "mac_mini",
            "load1": 5.69,
            "load5": 5.79,
            "load15": 4.98,
            "cpu_user_pct": 28.63,
            "cpu_sys_pct": 14.52,
            "cpu_idle_pct": 56.84,
            "cpu_busy_pct": 43.15,
            "phys_mem_used": "47G",
            "phys_mem_unused": "613M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          }
        ]
      },
      {
        "summary": {
          "concurrency": 3,
          "total_requests": 12,
          "request_count": 12,
          "success_count": 12,
          "error_count": 0,
          "error_rate": 0.0,
          "wall_s": 53.471846125030424,
          "throughput_rps": 0.2244171628550289,
          "latency_avg_s": 11.224783972332565,
          "latency_p50_s": 10.994812833494507,
          "latency_p95_s": 23.537262060036298,
          "latency_max_s": 25.672283249965403,
          "completion_tps": 16.158035725562083,
          "sample_errors": [],
          "peak_cpu_busy_pct": 55.35,
          "peak_load1": 7.28,
          "min_memory_free_pct": 14.0,
          "thermal_warning_seen": false,
          "performance_warning_seen": false,
          "thermal_status_peak": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded"
        },
        "requests": [
          {
            "request_id": "mac_mini-3-3",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138073.817645,
            "latency_s": 2.631130416993983,
            "finish_reason": "length",
            "content_preview": "- **Batch requests & use async I/O** \u2013 Process multiple incoming messages in parallel rather than one\u2011by\u2011one.  \n- **Cach",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-3-1",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138073.81736,
            "latency_s": 4.251829000015277,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-3-4",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138076.448983,
            "latency_s": 2.189707166980952,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-3-2",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138073.81756,
            "latency_s": 12.534756667038891,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-3-5",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138078.06947,
            "latency_s": 9.89541337499395,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing concurrent chat capacity for a small AI inference cluster. They need three short operational t",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-3-6",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138078.6387699,
            "latency_s": 21.7904265410034,
            "finish_reason": "length",
            "content_preview": "- **Batch requests**: Group incoming messages and process them in small, fixed-size batches to amortize model load and r",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-3-7",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138086.352475,
            "latency_s": 15.981095666997135,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-3-9",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138100.4293902,
            "latency_s": 3.273709000030067,
            "finish_reason": "length",
            "content_preview": "- **Batch requests**: Group incoming queries and process them in small batches to amortize overhead.  \n- **Scale horizon",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-3-10",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138102.333764,
            "latency_s": 10.467277624993585,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-3-8",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138087.9650261,
            "latency_s": 25.672283249965403,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-3-11",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138103.7032828,
            "latency_s": 11.522348041995429,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-3-12",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138112.801205,
            "latency_s": 14.487430916982703,
            "finish_reason": "length",
            "content_preview": "- **Batch requests & use async I/O** \u2013 Process multiple incoming messages in parallel and avoid blocking calls.  \n- **Sc",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          }
        ],
        "samples": [
          {
            "timestamp": 1778138073.611927,
            "unit": "mac_mini",
            "load1": 5.69,
            "load5": 5.79,
            "load15": 4.98,
            "cpu_user_pct": 31.12,
            "cpu_sys_pct": 10.5,
            "cpu_idle_pct": 58.36,
            "cpu_busy_pct": 41.620000000000005,
            "phys_mem_used": "47G",
            "phys_mem_unused": "638M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138076.883889,
            "unit": "mac_mini",
            "load1": 5.48,
            "load5": 5.75,
            "load15": 4.97,
            "cpu_user_pct": 23.37,
            "cpu_sys_pct": 19.91,
            "cpu_idle_pct": 56.7,
            "cpu_busy_pct": 43.28,
            "phys_mem_used": "47G",
            "phys_mem_unused": "383M",
            "memory_free_pct": 14.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138080.137766,
            "unit": "mac_mini",
            "load1": 5.48,
            "load5": 5.75,
            "load15": 4.97,
            "cpu_user_pct": 22.31,
            "cpu_sys_pct": 16.11,
            "cpu_idle_pct": 61.57,
            "cpu_busy_pct": 38.42,
            "phys_mem_used": "44G",
            "phys_mem_unused": "3406M",
            "memory_free_pct": 25.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138083.396713,
            "unit": "mac_mini",
            "load1": 7.28,
            "load5": 6.12,
            "load15": 5.1,
            "cpu_user_pct": 21.61,
            "cpu_sys_pct": 20.33,
            "cpu_idle_pct": 58.5,
            "cpu_busy_pct": 41.94,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1235M",
            "memory_free_pct": 25.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138086.656096,
            "unit": "mac_mini",
            "load1": 7.02,
            "load5": 6.08,
            "load15": 5.1,
            "cpu_user_pct": 22.41,
            "cpu_sys_pct": 11.2,
            "cpu_idle_pct": 66.37,
            "cpu_busy_pct": 33.61,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1522M",
            "memory_free_pct": 36.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138089.903391,
            "unit": "mac_mini",
            "load1": 7.02,
            "load5": 6.08,
            "load15": 5.1,
            "cpu_user_pct": 18.6,
            "cpu_sys_pct": 20.58,
            "cpu_idle_pct": 61.34,
            "cpu_busy_pct": 39.18,
            "phys_mem_used": "47G",
            "phys_mem_unused": "617M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138093.160774,
            "unit": "mac_mini",
            "load1": 7.02,
            "load5": 6.1,
            "load15": 5.11,
            "cpu_user_pct": 20.8,
            "cpu_sys_pct": 19.65,
            "cpu_idle_pct": 60.25,
            "cpu_busy_pct": 40.45,
            "phys_mem_used": "47G",
            "phys_mem_unused": "649M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138096.418628,
            "unit": "mac_mini",
            "load1": 6.94,
            "load5": 6.09,
            "load15": 5.11,
            "cpu_user_pct": 19.82,
            "cpu_sys_pct": 18.1,
            "cpu_idle_pct": 62.6,
            "cpu_busy_pct": 37.92,
            "phys_mem_used": "47G",
            "phys_mem_unused": "585M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138099.675811,
            "unit": "mac_mini",
            "load1": 6.94,
            "load5": 6.09,
            "load15": 5.11,
            "cpu_user_pct": 20.6,
            "cpu_sys_pct": 15.45,
            "cpu_idle_pct": 63.94,
            "cpu_busy_pct": 36.05,
            "phys_mem_used": "47G",
            "phys_mem_unused": "293M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138102.9294121,
            "unit": "mac_mini",
            "load1": 6.62,
            "load5": 6.04,
            "load15": 5.1,
            "cpu_user_pct": 30.4,
            "cpu_sys_pct": 12.87,
            "cpu_idle_pct": 57.8,
            "cpu_busy_pct": 43.269999999999996,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1650M",
            "memory_free_pct": 25.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138106.174461,
            "unit": "mac_mini",
            "load1": 6.65,
            "load5": 6.06,
            "load15": 5.11,
            "cpu_user_pct": 23.5,
            "cpu_sys_pct": 21.79,
            "cpu_idle_pct": 54.7,
            "cpu_busy_pct": 45.29,
            "phys_mem_used": "43G",
            "phys_mem_unused": "4883M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138109.43077,
            "unit": "mac_mini",
            "load1": 6.65,
            "load5": 6.06,
            "load15": 5.11,
            "cpu_user_pct": 20.83,
            "cpu_sys_pct": 20.41,
            "cpu_idle_pct": 58.75,
            "cpu_busy_pct": 41.239999999999995,
            "phys_mem_used": "45G",
            "phys_mem_unused": "2527M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138112.689749,
            "unit": "mac_mini",
            "load1": 6.36,
            "load5": 6.01,
            "load15": 5.1,
            "cpu_user_pct": 40.59,
            "cpu_sys_pct": 14.76,
            "cpu_idle_pct": 44.64,
            "cpu_busy_pct": 55.35,
            "phys_mem_used": "47G",
            "phys_mem_unused": "381M",
            "memory_free_pct": 25.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138115.980188,
            "unit": "mac_mini",
            "load1": 6.17,
            "load5": 5.97,
            "load15": 5.09,
            "cpu_user_pct": 16.66,
            "cpu_sys_pct": 18.37,
            "cpu_idle_pct": 64.95,
            "cpu_busy_pct": 35.03,
            "phys_mem_used": "47G",
            "phys_mem_unused": "542M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138119.243009,
            "unit": "mac_mini",
            "load1": 6.17,
            "load5": 5.97,
            "load15": 5.09,
            "cpu_user_pct": 13.98,
            "cpu_sys_pct": 13.55,
            "cpu_idle_pct": 72.45,
            "cpu_busy_pct": 27.53,
            "phys_mem_used": "47G",
            "phys_mem_unused": "679M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138122.50252,
            "unit": "mac_mini",
            "load1": 5.99,
            "load5": 5.94,
            "load15": 5.08,
            "cpu_user_pct": 20.94,
            "cpu_sys_pct": 17.9,
            "cpu_idle_pct": 61.96,
            "cpu_busy_pct": 38.84,
            "phys_mem_used": "47G",
            "phys_mem_unused": "575M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138125.757059,
            "unit": "mac_mini",
            "load1": 5.91,
            "load5": 5.93,
            "load15": 5.08,
            "cpu_user_pct": 30.21,
            "cpu_sys_pct": 14.46,
            "cpu_idle_pct": 55.31,
            "cpu_busy_pct": 44.67,
            "phys_mem_used": "47G",
            "phys_mem_unused": "525M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138127.490537,
            "unit": "mac_mini",
            "load1": 5.91,
            "load5": 5.93,
            "load15": 5.08,
            "cpu_user_pct": 14.6,
            "cpu_sys_pct": 10.17,
            "cpu_idle_pct": 75.22,
            "cpu_busy_pct": 24.77,
            "phys_mem_used": "47G",
            "phys_mem_unused": "485M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          }
        ]
      },
      {
        "summary": {
          "concurrency": 4,
          "total_requests": 16,
          "request_count": 16,
          "success_count": 16,
          "error_count": 0,
          "error_rate": 0.0,
          "wall_s": 60.43576420901809,
          "throughput_rps": 0.26474390138699555,
          "latency_avg_s": 13.394817190190224,
          "latency_p50_s": 14.2997927495162,
          "latency_p95_s": 26.077366625278955,
          "latency_max_s": 28.597050375014078,
          "completion_tps": 18.267329195702693,
          "sample_errors": [],
          "peak_cpu_busy_pct": 44.67,
          "peak_load1": 5.91,
          "min_memory_free_pct": 15.0,
          "thermal_warning_seen": false,
          "performance_warning_seen": false,
          "thermal_status_peak": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded"
        },
        "requests": [
          {
            "request_id": "mac_mini-4-3",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138129.938297,
            "latency_s": 2.598843334009871,
            "finish_reason": "length",
            "content_preview": "- **Batch requests**: Group incoming messages and process them in small batches to amortize model load and reduce per\u2011re",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-4-2",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138129.938195,
            "latency_s": 11.972800584044307,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-4-1",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138129.937988,
            "latency_s": 13.661103916994762,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-4-4",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138129.9384372,
            "latency_s": 14.383132916002069,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-4-7",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138143.5993018,
            "latency_s": 1.426015458011534,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-4-5",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138132.5373938,
            "latency_s": 13.13872408302268,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They want three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-4-8",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138144.321704,
            "latency_s": 2.9520862499484792,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They need three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-4-6",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138141.911144,
            "latency_s": 17.468932166986633,
            "finish_reason": "length",
            "content_preview": "- **Batch requests**: Group incoming messages into small batches before sending to the model; reduces per\u2011request overhe",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-4-10",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138145.676238,
            "latency_s": 15.055667916953098,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-4-9",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138145.025386,
            "latency_s": 17.544973791984376,
            "finish_reason": "length",
            "content_preview": "We need to give three short operational tips, concise bullet points. Provide short operational tips for keeping chat inf",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-4-12",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138159.380322,
            "latency_s": 5.734985000046436,
            "finish_reason": "length",
            "content_preview": "- **Batch requests**: Group incoming messages and process them in small, fixed-size batches to amortize overhead.  \n- **",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-4-13",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138160.7320619,
            "latency_s": 14.216452583030332,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          },
          {
            "request_id": "mac_mini-4-11",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138147.273941,
            "latency_s": 28.597050375014078,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster and wants three short operational tip",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-4-14",
            "model": "qwen3:30b",
            "ok": true,
            "started_at": 1778138162.5705862,
            "latency_s": 14.904496999981347,
            "finish_reason": "length",
            "content_preview": "Hmm, the user is testing concurrent chat capacity for a small AI inference cluster. They need three short operational ti",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "mac_mini-4-15",
            "model": "nemotron-3-nano:30b",
            "ok": true,
            "started_at": 1778138165.115391,
            "latency_s": 25.237472042033914,
            "finish_reason": "length",
            "content_preview": "- **Batch requests**: Group incoming messages and process them in small batches to amortize model loading overhead.  \n- ",
            "prompt_tokens": 83,
            "completion_tokens": 96,
            "total_tokens": 179
          },
          {
            "request_id": "mac_mini-4-16",
            "model": "qwen2.5:7b",
            "ok": true,
            "started_at": 1778138174.948769,
            "latency_s": 15.424337624979671,
            "finish_reason": "stop",
            "content_preview": "- Optimize model efficiency with quantization.\n- Implement load balancing across nodes.\n- Use caching for frequent queri",
            "prompt_tokens": 77,
            "completion_tokens": 24,
            "total_tokens": 101
          }
        ],
        "samples": [
          {
            "timestamp": 1778138129.732574,
            "unit": "mac_mini",
            "load1": 5.91,
            "load5": 5.93,
            "load15": 5.08,
            "cpu_user_pct": 30.41,
            "cpu_sys_pct": 11.2,
            "cpu_idle_pct": 58.55,
            "cpu_busy_pct": 41.61,
            "phys_mem_used": "47G",
            "phys_mem_unused": "391M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138133.000232,
            "unit": "mac_mini",
            "load1": 5.68,
            "load5": 5.88,
            "load15": 5.07,
            "cpu_user_pct": 26.38,
            "cpu_sys_pct": 18.29,
            "cpu_idle_pct": 55.31,
            "cpu_busy_pct": 44.67,
            "phys_mem_used": "42G",
            "phys_mem_unused": "5734M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138136.245624,
            "unit": "mac_mini",
            "load1": 5.55,
            "load5": 5.85,
            "load15": 5.06,
            "cpu_user_pct": 24.12,
            "cpu_sys_pct": 18.85,
            "cpu_idle_pct": 57.1,
            "cpu_busy_pct": 42.97,
            "phys_mem_used": "45G",
            "phys_mem_unused": "2907M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138139.499784,
            "unit": "mac_mini",
            "load1": 5.55,
            "load5": 5.85,
            "load15": 5.06,
            "cpu_user_pct": 20.6,
            "cpu_sys_pct": 15.87,
            "cpu_idle_pct": 63.51,
            "cpu_busy_pct": 36.47,
            "phys_mem_used": "47G",
            "phys_mem_unused": "620M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138142.749534,
            "unit": "mac_mini",
            "load1": 5.5,
            "load5": 5.83,
            "load15": 5.06,
            "cpu_user_pct": 16.94,
            "cpu_sys_pct": 12.28,
            "cpu_idle_pct": 70.76,
            "cpu_busy_pct": 29.22,
            "phys_mem_used": "47G",
            "phys_mem_unused": "169M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138146.008429,
            "unit": "mac_mini",
            "load1": 5.54,
            "load5": 5.83,
            "load15": 5.07,
            "cpu_user_pct": 23.72,
            "cpu_sys_pct": 11.86,
            "cpu_idle_pct": 64.4,
            "cpu_busy_pct": 35.58,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1611M",
            "memory_free_pct": 37.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138149.252915,
            "unit": "mac_mini",
            "load1": 5.54,
            "load5": 5.83,
            "load15": 5.07,
            "cpu_user_pct": 19.24,
            "cpu_sys_pct": 17.57,
            "cpu_idle_pct": 63.17,
            "cpu_busy_pct": 36.81,
            "phys_mem_used": "47G",
            "phys_mem_unused": "674M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138152.5054731,
            "unit": "mac_mini",
            "load1": 5.5,
            "load5": 5.82,
            "load15": 5.07,
            "cpu_user_pct": 19.32,
            "cpu_sys_pct": 18.48,
            "cpu_idle_pct": 62.18,
            "cpu_busy_pct": 37.8,
            "phys_mem_used": "47G",
            "phys_mem_unused": "585M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138155.7661939,
            "unit": "mac_mini",
            "load1": 5.54,
            "load5": 5.82,
            "load15": 5.07,
            "cpu_user_pct": 18.2,
            "cpu_sys_pct": 16.3,
            "cpu_idle_pct": 65.66,
            "cpu_busy_pct": 34.5,
            "phys_mem_used": "47G",
            "phys_mem_unused": "562M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138159.020288,
            "unit": "mac_mini",
            "load1": 5.54,
            "load5": 5.82,
            "load15": 5.07,
            "cpu_user_pct": 22.36,
            "cpu_sys_pct": 18.98,
            "cpu_idle_pct": 58.64,
            "cpu_busy_pct": 41.34,
            "phys_mem_used": "47G",
            "phys_mem_unused": "225M",
            "memory_free_pct": 17.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138162.278084,
            "unit": "mac_mini",
            "load1": 5.17,
            "load5": 5.74,
            "load15": 5.05,
            "cpu_user_pct": 13.27,
            "cpu_sys_pct": 16.59,
            "cpu_idle_pct": 70.12,
            "cpu_busy_pct": 29.86,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1580M",
            "memory_free_pct": 26.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138165.534474,
            "unit": "mac_mini",
            "load1": 5.17,
            "load5": 5.74,
            "load15": 5.05,
            "cpu_user_pct": 13.86,
            "cpu_sys_pct": 20.58,
            "cpu_idle_pct": 65.54,
            "cpu_busy_pct": 34.44,
            "phys_mem_used": "41G",
            "phys_mem_unused": "6341M",
            "memory_free_pct": 38.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138168.77696,
            "unit": "mac_mini",
            "load1": 5.16,
            "load5": 5.73,
            "load15": 5.05,
            "cpu_user_pct": 10.37,
            "cpu_sys_pct": 17.84,
            "cpu_idle_pct": 71.78,
            "cpu_busy_pct": 28.21,
            "phys_mem_used": "43G",
            "phys_mem_unused": "4280M",
            "memory_free_pct": 38.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138172.047947,
            "unit": "mac_mini",
            "load1": 4.83,
            "load5": 5.65,
            "load15": 5.03,
            "cpu_user_pct": 12.39,
            "cpu_sys_pct": 17.76,
            "cpu_idle_pct": 69.83,
            "cpu_busy_pct": 30.150000000000002,
            "phys_mem_used": "45G",
            "phys_mem_unused": "2193M",
            "memory_free_pct": 38.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138175.318881,
            "unit": "mac_mini",
            "load1": 4.83,
            "load5": 5.65,
            "load15": 5.03,
            "cpu_user_pct": 14.63,
            "cpu_sys_pct": 15.44,
            "cpu_idle_pct": 69.91,
            "cpu_busy_pct": 30.07,
            "phys_mem_used": "46G",
            "phys_mem_unused": "1712M",
            "memory_free_pct": 38.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138178.582783,
            "unit": "mac_mini",
            "load1": 5.48,
            "load5": 5.77,
            "load15": 5.07,
            "cpu_user_pct": 9.34,
            "cpu_sys_pct": 17.47,
            "cpu_idle_pct": 73.17,
            "cpu_busy_pct": 26.81,
            "phys_mem_used": "47G",
            "phys_mem_unused": "572M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138181.853915,
            "unit": "mac_mini",
            "load1": 5.52,
            "load5": 5.78,
            "load15": 5.08,
            "cpu_user_pct": 12.89,
            "cpu_sys_pct": 19.92,
            "cpu_idle_pct": 67.18,
            "cpu_busy_pct": 32.81,
            "phys_mem_used": "47G",
            "phys_mem_unused": "653M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138185.1285539,
            "unit": "mac_mini",
            "load1": 5.52,
            "load5": 5.78,
            "load15": 5.08,
            "cpu_user_pct": 8.19,
            "cpu_sys_pct": 19.67,
            "cpu_idle_pct": 72.13,
            "cpu_busy_pct": 27.86,
            "phys_mem_used": "47G",
            "phys_mem_unused": "655M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138188.3986409,
            "unit": "mac_mini",
            "load1": 5.32,
            "load5": 5.73,
            "load15": 5.07,
            "cpu_user_pct": 11.15,
            "cpu_sys_pct": 13.63,
            "cpu_idle_pct": 75.2,
            "cpu_busy_pct": 24.78,
            "phys_mem_used": "47G",
            "phys_mem_unused": "485M",
            "memory_free_pct": 27.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          },
          {
            "timestamp": 1778138190.578867,
            "unit": "mac_mini",
            "load1": 5.32,
            "load5": 5.73,
            "load15": 5.07,
            "cpu_user_pct": 13.33,
            "cpu_sys_pct": 14.66,
            "cpu_idle_pct": 72.0,
            "cpu_busy_pct": 27.990000000000002,
            "phys_mem_used": "47G",
            "phys_mem_unused": "139M",
            "memory_free_pct": 15.0,
            "thermal_status": "Note: No thermal warning level has been recorded\nNote: No performance warning level has been recorded\nNote: No CPU power status has been recorded",
            "thermal_warning": 0,
            "performance_warning": 0
          }
        ]
      }
    ],
    "baseline_p50_s": 12.374412957986351
  },
  "spark": {
    "warm_results": [
      {
        "request_id": "warm-spark-1",
        "model": "llava:latest",
        "ok": true,
        "started_at": 1778138874.220337,
        "latency_s": 3.691803375026211,
        "finish_reason": "stop",
        "content_preview": " 1. Optimize model size: Use smaller models that can be inferred quickly to reduce latency.\n2. Implement efficient batch",
        "prompt_tokens": 75,
        "completion_tokens": 73,
        "total_tokens": 148
      },
      {
        "request_id": "warm-spark-2",
        "model": "nemotron-mini:latest",
        "ok": true,
        "started_at": 1778138877.912492,
        "latency_s": 3.0711935000144877,
        "finish_reason": "stop",
        "content_preview": " * Use efficient algorithms: Implement optimized inference algorithms to reduce latency and improve throughput.\n* Optimi",
        "prompt_tokens": 76,
        "completion_tokens": 86,
        "total_tokens": 162
      },
      {
        "request_id": "warm-spark-3",
        "model": "nemotron:latest",
        "ok": true,
        "started_at": 1778138880.98399,
        "latency_s": 20.914444917056244,
        "finish_reason": "length",
        "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
        "prompt_tokens": 79,
        "completion_tokens": 96,
        "total_tokens": 175
      },
      {
        "request_id": "warm-spark-4",
        "model": "qwen3:8b",
        "ok": true,
        "started_at": 1778138901.898768,
        "latency_s": 2.504172917047981,
        "finish_reason": "length",
        "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
        "prompt_tokens": 79,
        "completion_tokens": 96,
        "total_tokens": 175
      }
    ],
    "steps": [
      {
        "summary": {
          "concurrency": 1,
          "total_requests": 12,
          "request_count": 12,
          "success_count": 12,
          "error_count": 0,
          "error_rate": 0.0,
          "wall_s": 87.31058462499641,
          "throughput_rps": 0.13744038081454427,
          "latency_avg_s": 7.275378760251139,
          "latency_p50_s": 2.787085541029228,
          "latency_p95_s": 21.0708027437533,
          "latency_max_s": 21.083474125014618,
          "completion_tps": 10.663082878195059,
          "sample_errors": [],
          "peak_gpu_temp_c": 63.0,
          "peak_gpu_util_pct": 96.0,
          "peak_gpu_power_w": 49.61,
          "peak_load1": 1.23,
          "peak_mem_used_mb": 107560
        },
        "requests": [
          {
            "request_id": "spark-1-1",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778138904.615563,
            "latency_s": 2.818743791023735,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the model architecture to reduce latency and increase throughput.\n2. Use efficient data structures and algo",
            "prompt_tokens": 75,
            "completion_tokens": 50,
            "total_tokens": 125
          },
          {
            "request_id": "spark-1-2",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778138907.434452,
            "latency_s": 2.8729636250063777,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized machine learning models that can handle high volumes of requests effici",
            "prompt_tokens": 76,
            "completion_tokens": 79,
            "total_tokens": 155
          },
          {
            "request_id": "spark-1-3",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778138910.307821,
            "latency_s": 20.997349124983884,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-1-4",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778138931.305357,
            "latency_s": 2.5408079589833505,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-1-5",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778138933.846577,
            "latency_s": 2.6527292910031974,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size to reduce latency\n2. Use efficient data structures for storing and processing chat messages\n3. I",
            "prompt_tokens": 75,
            "completion_tokens": 44,
            "total_tokens": 119
          },
          {
            "request_id": "spark-1-6",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778138936.499698,
            "latency_s": 2.8814695000182837,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized machine learning models to reduce inference time, such as using model p",
            "prompt_tokens": 76,
            "completion_tokens": 76,
            "total_tokens": 152
          },
          {
            "request_id": "spark-1-7",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778138939.381391,
            "latency_s": 21.060435249994043,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-1-8",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778138960.4421172,
            "latency_s": 2.488563166989479,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-1-9",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778138962.9308228,
            "latency_s": 2.648017457977403,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size to reduce latency.\n2. Use efficient data structures and algorithms.\n3. Implement load balancing ",
            "prompt_tokens": 75,
            "completion_tokens": 41,
            "total_tokens": 116
          },
          {
            "request_id": "spark-1-10",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778138965.5792792,
            "latency_s": 2.755427291034721,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference algorithms to minimize latency and improve throughput.\n* Para",
            "prompt_tokens": 76,
            "completion_tokens": 65,
            "total_tokens": 141
          },
          {
            "request_id": "spark-1-11",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778138968.335098,
            "latency_s": 21.083474125014618,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-1-12",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778138989.419244,
            "latency_s": 2.5045645409845747,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          }
        ],
        "samples": [
          {
            "timestamp": 1778138904.4032729,
            "unit": "spark",
            "load1": 0.7,
            "load5": 0.9,
            "load15": 0.66,
            "mem_total_mb": 124610,
            "mem_used_mb": 97963,
            "mem_free_mb": 12398,
            "mem_available_mb": 26647,
            "gpu_temp_c": 52.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 32.03
          },
          {
            "timestamp": 1778138907.936119,
            "unit": "spark",
            "load1": 0.96,
            "load5": 0.95,
            "load15": 0.68,
            "mem_total_mb": 124610,
            "mem_used_mb": 98394,
            "mem_free_mb": 11960,
            "mem_available_mb": 26216,
            "gpu_temp_c": 51.0,
            "gpu_util_pct": 9.0,
            "gpu_power_w": 17.49
          },
          {
            "timestamp": 1778138911.366452,
            "unit": "spark",
            "load1": 0.96,
            "load5": 0.95,
            "load15": 0.68,
            "mem_total_mb": 124610,
            "mem_used_mb": 101805,
            "mem_free_mb": 8549,
            "mem_available_mb": 22805,
            "gpu_temp_c": 55.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 43.79
          },
          {
            "timestamp": 1778138914.822352,
            "unit": "spark",
            "load1": 0.97,
            "load5": 0.96,
            "load15": 0.68,
            "mem_total_mb": 124610,
            "mem_used_mb": 101798,
            "mem_free_mb": 8556,
            "mem_available_mb": 22812,
            "gpu_temp_c": 56.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 43.98
          },
          {
            "timestamp": 1778138918.25027,
            "unit": "spark",
            "load1": 0.97,
            "load5": 0.96,
            "load15": 0.68,
            "mem_total_mb": 124610,
            "mem_used_mb": 101799,
            "mem_free_mb": 8554,
            "mem_available_mb": 22810,
            "gpu_temp_c": 57.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.08
          },
          {
            "timestamp": 1778138921.7207701,
            "unit": "spark",
            "load1": 0.97,
            "load5": 0.96,
            "load15": 0.68,
            "mem_total_mb": 124610,
            "mem_used_mb": 101805,
            "mem_free_mb": 8540,
            "mem_available_mb": 22805,
            "gpu_temp_c": 57.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.18
          },
          {
            "timestamp": 1778138925.520714,
            "unit": "spark",
            "load1": 1.05,
            "load5": 0.97,
            "load15": 0.69,
            "mem_total_mb": 124610,
            "mem_used_mb": 101808,
            "mem_free_mb": 8537,
            "mem_available_mb": 22802,
            "gpu_temp_c": 58.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.41
          },
          {
            "timestamp": 1778138928.954087,
            "unit": "spark",
            "load1": 1.05,
            "load5": 0.97,
            "load15": 0.69,
            "mem_total_mb": 124610,
            "mem_used_mb": 101816,
            "mem_free_mb": 8528,
            "mem_available_mb": 22794,
            "gpu_temp_c": 58.0,
            "gpu_util_pct": 84.0,
            "gpu_power_w": 43.21
          },
          {
            "timestamp": 1778138932.432585,
            "unit": "spark",
            "load1": 1.05,
            "load5": 0.97,
            "load15": 0.69,
            "mem_total_mb": 124610,
            "mem_used_mb": 101813,
            "mem_free_mb": 8530,
            "mem_available_mb": 22797,
            "gpu_temp_c": 59.0,
            "gpu_util_pct": 95.0,
            "gpu_power_w": 48.67
          },
          {
            "timestamp": 1778138935.9148521,
            "unit": "spark",
            "load1": 1.2,
            "load5": 1.01,
            "load15": 0.7,
            "mem_total_mb": 124610,
            "mem_used_mb": 107560,
            "mem_free_mb": 2782,
            "mem_available_mb": 17049,
            "gpu_temp_c": 58.0,
            "gpu_util_pct": 55.0,
            "gpu_power_w": 36.63
          },
          {
            "timestamp": 1778138939.3680398,
            "unit": "spark",
            "load1": 1.11,
            "load5": 0.99,
            "load15": 0.7,
            "mem_total_mb": 124610,
            "mem_used_mb": 101830,
            "mem_free_mb": 8512,
            "mem_available_mb": 22779,
            "gpu_temp_c": 58.0,
            "gpu_util_pct": 89.0,
            "gpu_power_w": 42.22
          },
          {
            "timestamp": 1778138942.768329,
            "unit": "spark",
            "load1": 1.11,
            "load5": 0.99,
            "load15": 0.7,
            "mem_total_mb": 124610,
            "mem_used_mb": 101821,
            "mem_free_mb": 8522,
            "mem_available_mb": 22789,
            "gpu_temp_c": 59.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.62
          },
          {
            "timestamp": 1778138946.205833,
            "unit": "spark",
            "load1": 1.18,
            "load5": 1.01,
            "load15": 0.71,
            "mem_total_mb": 124610,
            "mem_used_mb": 101813,
            "mem_free_mb": 8530,
            "mem_available_mb": 22797,
            "gpu_temp_c": 59.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.57
          },
          {
            "timestamp": 1778138949.623188,
            "unit": "spark",
            "load1": 1.16,
            "load5": 1.01,
            "load15": 0.71,
            "mem_total_mb": 124610,
            "mem_used_mb": 101828,
            "mem_free_mb": 8515,
            "mem_available_mb": 22782,
            "gpu_temp_c": 60.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.74
          },
          {
            "timestamp": 1778138953.121953,
            "unit": "spark",
            "load1": 1.23,
            "load5": 1.02,
            "load15": 0.71,
            "mem_total_mb": 124610,
            "mem_used_mb": 101824,
            "mem_free_mb": 8518,
            "mem_available_mb": 22786,
            "gpu_temp_c": 60.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.71
          },
          {
            "timestamp": 1778138956.63794,
            "unit": "spark",
            "load1": 1.23,
            "load5": 1.02,
            "load15": 0.71,
            "mem_total_mb": 124610,
            "mem_used_mb": 101816,
            "mem_free_mb": 8526,
            "mem_available_mb": 22794,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.2
          },
          {
            "timestamp": 1778138960.082371,
            "unit": "spark",
            "load1": 1.21,
            "load5": 1.02,
            "load15": 0.72,
            "mem_total_mb": 124610,
            "mem_used_mb": 101826,
            "mem_free_mb": 8514,
            "mem_available_mb": 22783,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 43.81
          },
          {
            "timestamp": 1778138963.486294,
            "unit": "spark",
            "load1": 1.2,
            "load5": 1.02,
            "load15": 0.72,
            "mem_total_mb": 124610,
            "mem_used_mb": 102323,
            "mem_free_mb": 8018,
            "mem_available_mb": 22287,
            "gpu_temp_c": 57.0,
            "gpu_util_pct": 9.0,
            "gpu_power_w": 19.6
          },
          {
            "timestamp": 1778138966.87781,
            "unit": "spark",
            "load1": 1.2,
            "load5": 1.02,
            "load15": 0.72,
            "mem_total_mb": 124610,
            "mem_used_mb": 101809,
            "mem_free_mb": 8531,
            "mem_available_mb": 22801,
            "gpu_temp_c": 56.0,
            "gpu_util_pct": 6.0,
            "gpu_power_w": 15.29
          },
          {
            "timestamp": 1778138970.226849,
            "unit": "spark",
            "load1": 1.18,
            "load5": 1.02,
            "load15": 0.72,
            "mem_total_mb": 124610,
            "mem_used_mb": 101827,
            "mem_free_mb": 8513,
            "mem_available_mb": 22782,
            "gpu_temp_c": 60.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.04
          },
          {
            "timestamp": 1778138973.681639,
            "unit": "spark",
            "load1": 1.16,
            "load5": 1.02,
            "load15": 0.72,
            "mem_total_mb": 124610,
            "mem_used_mb": 101820,
            "mem_free_mb": 8520,
            "mem_available_mb": 22790,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.0
          },
          {
            "timestamp": 1778138977.0943801,
            "unit": "spark",
            "load1": 1.16,
            "load5": 1.02,
            "load15": 0.72,
            "mem_total_mb": 124610,
            "mem_used_mb": 101822,
            "mem_free_mb": 8519,
            "mem_available_mb": 22788,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.34
          },
          {
            "timestamp": 1778138980.5814888,
            "unit": "spark",
            "load1": 1.15,
            "load5": 1.02,
            "load15": 0.72,
            "mem_total_mb": 124610,
            "mem_used_mb": 101813,
            "mem_free_mb": 8526,
            "mem_available_mb": 22796,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.52
          },
          {
            "timestamp": 1778138983.9977758,
            "unit": "spark",
            "load1": 1.14,
            "load5": 1.02,
            "load15": 0.73,
            "mem_total_mb": 124610,
            "mem_used_mb": 101805,
            "mem_free_mb": 8535,
            "mem_available_mb": 22805,
            "gpu_temp_c": 62.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.44
          },
          {
            "timestamp": 1778138987.709131,
            "unit": "spark",
            "load1": 1.14,
            "load5": 1.02,
            "load15": 0.73,
            "mem_total_mb": 124610,
            "mem_used_mb": 101801,
            "mem_free_mb": 8539,
            "mem_available_mb": 22809,
            "gpu_temp_c": 62.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.61
          },
          {
            "timestamp": 1778138991.180181,
            "unit": "spark",
            "load1": 1.21,
            "load5": 1.04,
            "load15": 0.73,
            "mem_total_mb": 124610,
            "mem_used_mb": 101790,
            "mem_free_mb": 8550,
            "mem_available_mb": 22820,
            "gpu_temp_c": 63.0,
            "gpu_util_pct": 95.0,
            "gpu_power_w": 49.61
          },
          {
            "timestamp": 1778138992.13164,
            "unit": "spark",
            "load1": 1.21,
            "load5": 1.04,
            "load15": 0.73,
            "mem_total_mb": 124610,
            "mem_used_mb": 101788,
            "mem_free_mb": 8552,
            "mem_available_mb": 22822,
            "gpu_temp_c": 58.0,
            "gpu_util_pct": 95.0,
            "gpu_power_w": 32.23
          }
        ]
      },
      {
        "summary": {
          "concurrency": 2,
          "total_requests": 12,
          "request_count": 12,
          "success_count": 12,
          "error_count": 0,
          "error_rate": 0.0,
          "wall_s": 88.69139995798469,
          "throughput_rps": 0.13530060418129264,
          "latency_avg_s": 13.330229774330897,
          "latency_p50_s": 9.189531667012488,
          "latency_p95_s": 30.284897798232848,
          "latency_max_s": 31.567722541978583,
          "completion_tps": 11.737327412727137,
          "sample_errors": [],
          "peak_gpu_temp_c": 66.0,
          "peak_gpu_util_pct": 96.0,
          "peak_gpu_power_w": 46.61,
          "peak_load1": 3.62,
          "peak_mem_used_mb": 107717
        },
        "requests": [
          {
            "request_id": "spark-2-1",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778138994.6842098,
            "latency_s": 3.424049791006837,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size: Use smaller models or prune larger ones to reduce memory usage and latency.\n\n2. Implement effic",
            "prompt_tokens": 75,
            "completion_tokens": 70,
            "total_tokens": 145
          },
          {
            "request_id": "spark-2-2",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778138994.6843672,
            "latency_s": 9.565843084012158,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized machine learning models that can process data quickly, reducing respons",
            "prompt_tokens": 76,
            "completion_tokens": 70,
            "total_tokens": 146
          },
          {
            "request_id": "spark-2-4",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139004.250339,
            "latency_s": 12.213246792030986,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-2-5",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139016.46399,
            "latency_s": 7.811381875013467,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the AI model for efficient inference to reduce latency.\n2. Implement load balancing to distribute incoming ",
            "prompt_tokens": 75,
            "completion_tokens": 52,
            "total_tokens": 127
          },
          {
            "request_id": "spark-2-3",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778138998.1087308,
            "latency_s": 29.235313916986343,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-2-6",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139024.275819,
            "latency_s": 7.897505333006848,
            "finish_reason": "stop",
            "content_preview": " - Use efficient algorithms: Implement optimized inference models that can handle multiple requests simultaneously, such",
            "prompt_tokens": 76,
            "completion_tokens": 93,
            "total_tokens": 169
          },
          {
            "request_id": "spark-2-8",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139032.173782,
            "latency_s": 7.062651874963194,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-2-9",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139039.236971,
            "latency_s": 9.995615582971368,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size: Use smaller models that can be inferred quickly, reducing the latency of responses.\n2. Implemen",
            "prompt_tokens": 75,
            "completion_tokens": 84,
            "total_tokens": 159
          },
          {
            "request_id": "spark-2-10",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139049.2330391,
            "latency_s": 8.813220250012819,
            "finish_reason": "length",
            "content_preview": " * Use efficient algorithms: Implement optimized machine learning models that can handle high volumes of requests quickl",
            "prompt_tokens": 76,
            "completion_tokens": 96,
            "total_tokens": 172
          },
          {
            "request_id": "spark-2-7",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139027.344557,
            "latency_s": 31.567722541978583,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-2-12",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139058.912607,
            "latency_s": 7.0477377499919385,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-2-11",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139058.046526,
            "latency_s": 25.328468499996234,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          }
        ],
        "samples": [
          {
            "timestamp": 1778138994.471599,
            "unit": "spark",
            "load1": 1.27,
            "load5": 1.05,
            "load15": 0.74,
            "mem_total_mb": 124610,
            "mem_used_mb": 101798,
            "mem_free_mb": 8541,
            "mem_available_mb": 22812,
            "gpu_temp_c": 57.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 14.85
          },
          {
            "timestamp": 1778138997.81117,
            "unit": "spark",
            "load1": 1.27,
            "load5": 1.05,
            "load15": 0.74,
            "mem_total_mb": 124610,
            "mem_used_mb": 96084,
            "mem_free_mb": 14257,
            "mem_available_mb": 28525,
            "gpu_temp_c": 60.0,
            "gpu_util_pct": 95.0,
            "gpu_power_w": 38.47
          },
          {
            "timestamp": 1778139001.2433019,
            "unit": "spark",
            "load1": 1.65,
            "load5": 1.14,
            "load15": 0.77,
            "mem_total_mb": 124610,
            "mem_used_mb": 99635,
            "mem_free_mb": 10704,
            "mem_available_mb": 24975,
            "gpu_temp_c": 62.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.12
          },
          {
            "timestamp": 1778139004.6962,
            "unit": "spark",
            "load1": 1.68,
            "load5": 1.15,
            "load15": 0.78,
            "mem_total_mb": 124610,
            "mem_used_mb": 99238,
            "mem_free_mb": 11101,
            "mem_available_mb": 25372,
            "gpu_temp_c": 62.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.09
          },
          {
            "timestamp": 1778139008.319351,
            "unit": "spark",
            "load1": 1.7,
            "load5": 1.17,
            "load15": 0.78,
            "mem_total_mb": 124610,
            "mem_used_mb": 101711,
            "mem_free_mb": 8920,
            "mem_available_mb": 22899,
            "gpu_temp_c": 62.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.0
          },
          {
            "timestamp": 1778139011.857145,
            "unit": "spark",
            "load1": 1.7,
            "load5": 1.17,
            "load15": 0.78,
            "mem_total_mb": 124610,
            "mem_used_mb": 101960,
            "mem_free_mb": 8671,
            "mem_available_mb": 22650,
            "gpu_temp_c": 63.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.16
          },
          {
            "timestamp": 1778139015.2675672,
            "unit": "spark",
            "load1": 1.73,
            "load5": 1.18,
            "load15": 0.79,
            "mem_total_mb": 124610,
            "mem_used_mb": 101988,
            "mem_free_mb": 8643,
            "mem_available_mb": 22622,
            "gpu_temp_c": 63.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.32
          },
          {
            "timestamp": 1778139018.767255,
            "unit": "spark",
            "load1": 1.83,
            "load5": 1.21,
            "load15": 0.8,
            "mem_total_mb": 124610,
            "mem_used_mb": 102461,
            "mem_free_mb": 8170,
            "mem_available_mb": 22149,
            "gpu_temp_c": 63.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.07
          },
          {
            "timestamp": 1778139022.18968,
            "unit": "spark",
            "load1": 1.83,
            "load5": 1.21,
            "load15": 0.8,
            "mem_total_mb": 124610,
            "mem_used_mb": 107717,
            "mem_free_mb": 2913,
            "mem_available_mb": 16892,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.07
          },
          {
            "timestamp": 1778139025.565114,
            "unit": "spark",
            "load1": 1.92,
            "load5": 1.24,
            "load15": 0.81,
            "mem_total_mb": 124610,
            "mem_used_mb": 98357,
            "mem_free_mb": 12273,
            "mem_available_mb": 26253,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.38
          },
          {
            "timestamp": 1778139029.001819,
            "unit": "spark",
            "load1": 1.93,
            "load5": 1.25,
            "load15": 0.82,
            "mem_total_mb": 124610,
            "mem_used_mb": 101989,
            "mem_free_mb": 8641,
            "mem_available_mb": 22621,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.03
          },
          {
            "timestamp": 1778139032.405018,
            "unit": "spark",
            "load1": 1.93,
            "load5": 1.25,
            "load15": 0.82,
            "mem_total_mb": 124610,
            "mem_used_mb": 101962,
            "mem_free_mb": 8667,
            "mem_available_mb": 22647,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.58
          },
          {
            "timestamp": 1778139035.8169868,
            "unit": "spark",
            "load1": 3.62,
            "load5": 1.61,
            "load15": 0.94,
            "mem_total_mb": 124610,
            "mem_used_mb": 101940,
            "mem_free_mb": 8667,
            "mem_available_mb": 22669,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.72
          },
          {
            "timestamp": 1778139039.242545,
            "unit": "spark",
            "load1": 3.57,
            "load5": 1.64,
            "load15": 0.95,
            "mem_total_mb": 124610,
            "mem_used_mb": 101664,
            "mem_free_mb": 8943,
            "mem_available_mb": 22946,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.52
          },
          {
            "timestamp": 1778139042.944156,
            "unit": "spark",
            "load1": 3.6,
            "load5": 1.68,
            "load15": 0.97,
            "mem_total_mb": 124610,
            "mem_used_mb": 107547,
            "mem_free_mb": 3353,
            "mem_available_mb": 17063,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.92
          },
          {
            "timestamp": 1778139046.395947,
            "unit": "spark",
            "load1": 3.6,
            "load5": 1.68,
            "load15": 0.97,
            "mem_total_mb": 124610,
            "mem_used_mb": 107660,
            "mem_free_mb": 3233,
            "mem_available_mb": 16950,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.54
          },
          {
            "timestamp": 1778139050.220781,
            "unit": "spark",
            "load1": 3.47,
            "load5": 1.68,
            "load15": 0.97,
            "mem_total_mb": 124610,
            "mem_used_mb": 98153,
            "mem_free_mb": 12740,
            "mem_available_mb": 26457,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.42
          },
          {
            "timestamp": 1778139053.622127,
            "unit": "spark",
            "load1": 3.36,
            "load5": 1.69,
            "load15": 0.98,
            "mem_total_mb": 124610,
            "mem_used_mb": 101939,
            "mem_free_mb": 8953,
            "mem_available_mb": 22671,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.05
          },
          {
            "timestamp": 1778139057.034428,
            "unit": "spark",
            "load1": 3.36,
            "load5": 1.69,
            "load15": 0.98,
            "mem_total_mb": 124610,
            "mem_used_mb": 101940,
            "mem_free_mb": 8952,
            "mem_available_mb": 22670,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.49
          },
          {
            "timestamp": 1778139060.525919,
            "unit": "spark",
            "load1": 3.25,
            "load5": 1.69,
            "load15": 0.99,
            "mem_total_mb": 124610,
            "mem_used_mb": 101936,
            "mem_free_mb": 8956,
            "mem_available_mb": 22673,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.1
          },
          {
            "timestamp": 1778139063.95607,
            "unit": "spark",
            "load1": 3.15,
            "load5": 1.7,
            "load15": 0.99,
            "mem_total_mb": 124610,
            "mem_used_mb": 101945,
            "mem_free_mb": 8948,
            "mem_available_mb": 22665,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.04
          },
          {
            "timestamp": 1778139067.3620028,
            "unit": "spark",
            "load1": 3.15,
            "load5": 1.7,
            "load15": 0.99,
            "mem_total_mb": 124610,
            "mem_used_mb": 101937,
            "mem_free_mb": 8956,
            "mem_available_mb": 22673,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.61
          },
          {
            "timestamp": 1778139070.7901728,
            "unit": "spark",
            "load1": 3.06,
            "load5": 1.7,
            "load15": 1.0,
            "mem_total_mb": 124610,
            "mem_used_mb": 101956,
            "mem_free_mb": 8936,
            "mem_available_mb": 22654,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.46
          },
          {
            "timestamp": 1778139074.280235,
            "unit": "spark",
            "load1": 2.89,
            "load5": 1.69,
            "load15": 1.0,
            "mem_total_mb": 124610,
            "mem_used_mb": 101940,
            "mem_free_mb": 8952,
            "mem_available_mb": 22670,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.49
          },
          {
            "timestamp": 1778139077.760362,
            "unit": "spark",
            "load1": 2.89,
            "load5": 1.69,
            "load15": 1.0,
            "mem_total_mb": 124610,
            "mem_used_mb": 101949,
            "mem_free_mb": 8943,
            "mem_available_mb": 22661,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.42
          },
          {
            "timestamp": 1778139081.23623,
            "unit": "spark",
            "load1": 2.98,
            "load5": 1.73,
            "load15": 1.01,
            "mem_total_mb": 124610,
            "mem_used_mb": 101945,
            "mem_free_mb": 8946,
            "mem_available_mb": 22665,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.41
          },
          {
            "timestamp": 1778139083.581696,
            "unit": "spark",
            "load1": 2.82,
            "load5": 1.72,
            "load15": 1.01,
            "mem_total_mb": 124610,
            "mem_used_mb": 101935,
            "mem_free_mb": 8956,
            "mem_available_mb": 22675,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 30.09
          }
        ]
      },
      {
        "summary": {
          "concurrency": 4,
          "total_requests": 16,
          "request_count": 16,
          "success_count": 16,
          "error_count": 0,
          "error_rate": 0.0,
          "wall_s": 115.54268729197793,
          "throughput_rps": 0.138476959252019,
          "latency_avg_s": 22.310767362047045,
          "latency_p50_s": 11.786476020992268,
          "latency_p95_s": 59.26891944797535,
          "latency_max_s": 62.25069016701309,
          "completion_tps": 11.926328115580136,
          "sample_errors": [],
          "peak_gpu_temp_c": 65.0,
          "peak_gpu_util_pct": 96.0,
          "peak_gpu_power_w": 48.01,
          "peak_load1": 3.17,
          "peak_mem_used_mb": 107654
        },
        "requests": [
          {
            "request_id": "spark-4-4",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139086.158352,
            "latency_s": 8.299695915949997,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-4-1",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139086.157821,
            "latency_s": 11.126016874972265,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size: Use smaller models that can be loaded faster, reducing the latency of responses.\n2. Implement l",
            "prompt_tokens": 75,
            "completion_tokens": 84,
            "total_tokens": 159
          },
          {
            "request_id": "spark-4-5",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139094.458316,
            "latency_s": 7.248635499970987,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the model: Use efficient models that are optimized for inference to reduce latency.\n2. Load balance: Distri",
            "prompt_tokens": 75,
            "completion_tokens": 74,
            "total_tokens": 149
          },
          {
            "request_id": "spark-4-2",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139086.158036,
            "latency_s": 22.299941167002544,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference algorithms to minimize latency and improve throughput.\n* Para",
            "prompt_tokens": 76,
            "completion_tokens": 62,
            "total_tokens": 138
          },
          {
            "request_id": "spark-4-6",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139097.28419,
            "latency_s": 16.676338125020266,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference models that minimize latency and improve throughput, such as ",
            "prompt_tokens": 76,
            "completion_tokens": 87,
            "total_tokens": 163
          },
          {
            "request_id": "spark-4-8",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139108.4585228,
            "latency_s": 8.527736791991629,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-4-9",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139113.96084,
            "latency_s": 8.497868999955244,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the model architecture to reduce latency and increase throughput.\n2. Use efficient data structures and algo",
            "prompt_tokens": 75,
            "completion_tokens": 55,
            "total_tokens": 130
          },
          {
            "request_id": "spark-4-3",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139086.158185,
            "latency_s": 38.995147667010315,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-4-10",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139116.9865868,
            "latency_s": 11.26212025003042,
            "finish_reason": "length",
            "content_preview": " * Use efficient algorithms: Implement optimized machine learning models that can handle high volumes of requests effici",
            "prompt_tokens": 76,
            "completion_tokens": 96,
            "total_tokens": 172
          },
          {
            "request_id": "spark-4-12",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139125.153885,
            "latency_s": 13.02645212499192,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-4-13",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139128.249259,
            "latency_s": 11.19887274998473,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size to reduce memory usage and improve inference speed.\n2. Use efficient data structures and algorit",
            "prompt_tokens": 75,
            "completion_tokens": 56,
            "total_tokens": 131
          },
          {
            "request_id": "spark-4-14",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139138.180649,
            "latency_s": 9.434529458987527,
            "finish_reason": "length",
            "content_preview": " * Use efficient algorithms: Implement optimized machine learning models that can handle high volumes of requests quickl",
            "prompt_tokens": 76,
            "completion_tokens": 96,
            "total_tokens": 172
          },
          {
            "request_id": "spark-4-7",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139101.707396,
            "latency_s": 57.5424043329549,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across multiple AI nodes\n\t+ U",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-4-16",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139147.615691,
            "latency_s": 12.310831791954115,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-4-11",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139122.459011,
            "latency_s": 58.27499587496277,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balance**: \n\t+ Distribute incoming chats across multiple AI nodes\n\t+ Us",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-4-15",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139139.448576,
            "latency_s": 62.25069016701309,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          }
        ],
        "samples": [
          {
            "timestamp": 1778139085.9472241,
            "unit": "spark",
            "load1": 2.82,
            "load5": 1.72,
            "load15": 1.01,
            "mem_total_mb": 124610,
            "mem_used_mb": 101930,
            "mem_free_mb": 8961,
            "mem_available_mb": 22679,
            "gpu_temp_c": 59.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 15.22
          },
          {
            "timestamp": 1778139089.2797298,
            "unit": "spark",
            "load1": 2.84,
            "load5": 1.74,
            "load15": 1.02,
            "mem_total_mb": 124610,
            "mem_used_mb": 106868,
            "mem_free_mb": 4316,
            "mem_available_mb": 17742,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.1
          },
          {
            "timestamp": 1778139092.773406,
            "unit": "spark",
            "load1": 2.84,
            "load5": 1.74,
            "load15": 1.02,
            "mem_total_mb": 124610,
            "mem_used_mb": 107642,
            "mem_free_mb": 3541,
            "mem_available_mb": 16968,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.83
          },
          {
            "timestamp": 1778139096.217921,
            "unit": "spark",
            "load1": 3.17,
            "load5": 1.83,
            "load15": 1.06,
            "mem_total_mb": 124610,
            "mem_used_mb": 107648,
            "mem_free_mb": 3535,
            "mem_available_mb": 16962,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.19
          },
          {
            "timestamp": 1778139099.676312,
            "unit": "spark",
            "load1": 3.08,
            "load5": 1.83,
            "load15": 1.06,
            "mem_total_mb": 124610,
            "mem_used_mb": 107654,
            "mem_free_mb": 3529,
            "mem_available_mb": 16956,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.24
          },
          {
            "timestamp": 1778139103.10156,
            "unit": "spark",
            "load1": 2.99,
            "load5": 1.83,
            "load15": 1.07,
            "mem_total_mb": 124610,
            "mem_used_mb": 98369,
            "mem_free_mb": 12813,
            "mem_available_mb": 26241,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.61
          },
          {
            "timestamp": 1778139106.5893881,
            "unit": "spark",
            "load1": 2.99,
            "load5": 1.83,
            "load15": 1.07,
            "mem_total_mb": 124610,
            "mem_used_mb": 101989,
            "mem_free_mb": 9192,
            "mem_available_mb": 22620,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.04
          },
          {
            "timestamp": 1778139110.00369,
            "unit": "spark",
            "load1": 2.91,
            "load5": 1.84,
            "load15": 1.07,
            "mem_total_mb": 124610,
            "mem_used_mb": 102023,
            "mem_free_mb": 9158,
            "mem_available_mb": 22587,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.15
          },
          {
            "timestamp": 1778139113.694922,
            "unit": "spark",
            "load1": 2.92,
            "load5": 1.86,
            "load15": 1.08,
            "mem_total_mb": 124610,
            "mem_used_mb": 102011,
            "mem_free_mb": 9170,
            "mem_available_mb": 22599,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.9
          },
          {
            "timestamp": 1778139117.2161,
            "unit": "spark",
            "load1": 2.92,
            "load5": 1.86,
            "load15": 1.08,
            "mem_total_mb": 124610,
            "mem_used_mb": 106841,
            "mem_free_mb": 4632,
            "mem_available_mb": 17768,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.38
          },
          {
            "timestamp": 1778139120.686563,
            "unit": "spark",
            "load1": 2.92,
            "load5": 1.87,
            "load15": 1.09,
            "mem_total_mb": 124610,
            "mem_used_mb": 96099,
            "mem_free_mb": 15374,
            "mem_available_mb": 28511,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 41.82
          },
          {
            "timestamp": 1778139124.15419,
            "unit": "spark",
            "load1": 2.85,
            "load5": 1.88,
            "load15": 1.1,
            "mem_total_mb": 124610,
            "mem_used_mb": 99693,
            "mem_free_mb": 12073,
            "mem_available_mb": 24917,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.33
          },
          {
            "timestamp": 1778139127.6036139,
            "unit": "spark",
            "load1": 2.85,
            "load5": 1.88,
            "load15": 1.1,
            "mem_total_mb": 124610,
            "mem_used_mb": 90845,
            "mem_free_mb": 20920,
            "mem_available_mb": 33765,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.31
          },
          {
            "timestamp": 1778139131.330719,
            "unit": "spark",
            "load1": 3.02,
            "load5": 1.93,
            "load15": 1.12,
            "mem_total_mb": 124610,
            "mem_used_mb": 102543,
            "mem_free_mb": 9510,
            "mem_available_mb": 22067,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.43
          },
          {
            "timestamp": 1778139134.73699,
            "unit": "spark",
            "load1": 3.02,
            "load5": 1.95,
            "load15": 1.13,
            "mem_total_mb": 124610,
            "mem_used_mb": 107595,
            "mem_free_mb": 4679,
            "mem_available_mb": 17015,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.27
          },
          {
            "timestamp": 1778139138.195488,
            "unit": "spark",
            "load1": 3.1,
            "load5": 1.98,
            "load15": 1.15,
            "mem_total_mb": 124610,
            "mem_used_mb": 107647,
            "mem_free_mb": 4626,
            "mem_available_mb": 16963,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.7
          },
          {
            "timestamp": 1778139141.603918,
            "unit": "spark",
            "load1": 3.1,
            "load5": 1.98,
            "load15": 1.15,
            "mem_total_mb": 124610,
            "mem_used_mb": 98583,
            "mem_free_mb": 13816,
            "mem_available_mb": 26027,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 48.01
          },
          {
            "timestamp": 1778139145.07073,
            "unit": "spark",
            "load1": 2.93,
            "load5": 1.96,
            "load15": 1.14,
            "mem_total_mb": 124610,
            "mem_used_mb": 99780,
            "mem_free_mb": 12617,
            "mem_available_mb": 24830,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.35
          },
          {
            "timestamp": 1778139148.484291,
            "unit": "spark",
            "load1": 2.94,
            "load5": 1.98,
            "load15": 1.16,
            "mem_total_mb": 124610,
            "mem_used_mb": 90407,
            "mem_free_mb": 21966,
            "mem_available_mb": 34203,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.18
          },
          {
            "timestamp": 1778139152.030207,
            "unit": "spark",
            "load1": 2.94,
            "load5": 1.98,
            "load15": 1.16,
            "mem_total_mb": 124610,
            "mem_used_mb": 102039,
            "mem_free_mb": 10344,
            "mem_available_mb": 22571,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.42
          },
          {
            "timestamp": 1778139155.637675,
            "unit": "spark",
            "load1": 2.94,
            "load5": 2.0,
            "load15": 1.17,
            "mem_total_mb": 124610,
            "mem_used_mb": 102078,
            "mem_free_mb": 10302,
            "mem_available_mb": 22532,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.85
          },
          {
            "timestamp": 1778139159.058965,
            "unit": "spark",
            "load1": 2.87,
            "load5": 2.0,
            "load15": 1.17,
            "mem_total_mb": 124610,
            "mem_used_mb": 102090,
            "mem_free_mb": 10290,
            "mem_available_mb": 22520,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.92
          },
          {
            "timestamp": 1778139162.499986,
            "unit": "spark",
            "load1": 2.87,
            "load5": 2.0,
            "load15": 1.17,
            "mem_total_mb": 124610,
            "mem_used_mb": 102069,
            "mem_free_mb": 10310,
            "mem_available_mb": 22540,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.83
          },
          {
            "timestamp": 1778139166.0092149,
            "unit": "spark",
            "load1": 3.12,
            "load5": 2.07,
            "load15": 1.2,
            "mem_total_mb": 124610,
            "mem_used_mb": 102076,
            "mem_free_mb": 10302,
            "mem_available_mb": 22533,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.67
          },
          {
            "timestamp": 1778139169.4651039,
            "unit": "spark",
            "load1": 2.95,
            "load5": 2.05,
            "load15": 1.2,
            "mem_total_mb": 124610,
            "mem_used_mb": 102075,
            "mem_free_mb": 10302,
            "mem_available_mb": 22535,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.43
          },
          {
            "timestamp": 1778139172.98998,
            "unit": "spark",
            "load1": 2.95,
            "load5": 2.06,
            "load15": 1.2,
            "mem_total_mb": 124610,
            "mem_used_mb": 102058,
            "mem_free_mb": 10308,
            "mem_available_mb": 22552,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.83
          },
          {
            "timestamp": 1778139176.8436742,
            "unit": "spark",
            "load1": 2.95,
            "load5": 2.06,
            "load15": 1.2,
            "mem_total_mb": 124610,
            "mem_used_mb": 102072,
            "mem_free_mb": 10292,
            "mem_available_mb": 22537,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 84.0,
            "gpu_power_w": 45.56
          },
          {
            "timestamp": 1778139180.2763572,
            "unit": "spark",
            "load1": 3.04,
            "load5": 2.1,
            "load15": 1.22,
            "mem_total_mb": 124610,
            "mem_used_mb": 102059,
            "mem_free_mb": 10304,
            "mem_available_mb": 22551,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.21
          },
          {
            "timestamp": 1778139183.736062,
            "unit": "spark",
            "load1": 2.87,
            "load5": 2.08,
            "load15": 1.22,
            "mem_total_mb": 124610,
            "mem_used_mb": 102021,
            "mem_free_mb": 10342,
            "mem_available_mb": 22589,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.31
          },
          {
            "timestamp": 1778139187.1709812,
            "unit": "spark",
            "load1": 2.87,
            "load5": 2.08,
            "load15": 1.22,
            "mem_total_mb": 124610,
            "mem_used_mb": 102004,
            "mem_free_mb": 10359,
            "mem_available_mb": 22606,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.28
          },
          {
            "timestamp": 1778139190.615123,
            "unit": "spark",
            "load1": 2.72,
            "load5": 2.06,
            "load15": 1.22,
            "mem_total_mb": 124610,
            "mem_used_mb": 102156,
            "mem_free_mb": 9983,
            "mem_available_mb": 22454,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.08
          },
          {
            "timestamp": 1778139194.081024,
            "unit": "spark",
            "load1": 2.66,
            "load5": 2.06,
            "load15": 1.22,
            "mem_total_mb": 124610,
            "mem_used_mb": 102221,
            "mem_free_mb": 9739,
            "mem_available_mb": 22389,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.29
          },
          {
            "timestamp": 1778139197.518642,
            "unit": "spark",
            "load1": 2.66,
            "load5": 2.06,
            "load15": 1.22,
            "mem_total_mb": 124610,
            "mem_used_mb": 101987,
            "mem_free_mb": 9973,
            "mem_available_mb": 22623,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.3
          },
          {
            "timestamp": 1778139200.9764879,
            "unit": "spark",
            "load1": 2.61,
            "load5": 2.06,
            "load15": 1.23,
            "mem_total_mb": 124610,
            "mem_used_mb": 101981,
            "mem_free_mb": 9979,
            "mem_available_mb": 22629,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.33
          },
          {
            "timestamp": 1778139201.903109,
            "unit": "spark",
            "load1": 2.61,
            "load5": 2.06,
            "load15": 1.23,
            "mem_total_mb": 124610,
            "mem_used_mb": 101973,
            "mem_free_mb": 9987,
            "mem_available_mb": 22637,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 31.3
          }
        ]
      },
      {
        "summary": {
          "concurrency": 6,
          "total_requests": 24,
          "request_count": 24,
          "success_count": 24,
          "error_count": 0,
          "error_rate": 0.0,
          "wall_s": 168.23583712504478,
          "throughput_rps": 0.14265688220852435,
          "latency_avg_s": 29.826767461750325,
          "latency_p50_s": 15.215512667025905,
          "latency_p95_s": 95.34877841250852,
          "latency_max_s": 100.9161638750229,
          "completion_tps": 11.691920304340307,
          "sample_errors": [],
          "peak_gpu_temp_c": 66.0,
          "peak_gpu_util_pct": 96.0,
          "peak_gpu_power_w": 47.52,
          "peak_load1": 3.07,
          "peak_mem_used_mb": 107876
        },
        "requests": [
          {
            "request_id": "spark-6-4",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139204.4369438,
            "latency_s": 8.22675283299759,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-5",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139204.437065,
            "latency_s": 9.320884833985474,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the model architecture to reduce latency and increase throughput.\n2. Use efficient data structures and algo",
            "prompt_tokens": 75,
            "completion_tokens": 54,
            "total_tokens": 129
          },
          {
            "request_id": "spark-6-1",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139204.4364328,
            "latency_s": 16.457281958020758,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size: Use smaller models that can be inferred quickly to reduce latency.\n\n2. Implement load balancing",
            "prompt_tokens": 75,
            "completion_tokens": 83,
            "total_tokens": 158
          },
          {
            "request_id": "spark-6-8",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139213.758052,
            "latency_s": 9.067571708001196,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-2",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139204.436651,
            "latency_s": 23.682719000033103,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference algorithms to minimize latency and improve throughput.\n* Para",
            "prompt_tokens": 76,
            "completion_tokens": 70,
            "total_tokens": 146
          },
          {
            "request_id": "spark-6-6",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139204.437187,
            "latency_s": 28.62696170801064,
            "finish_reason": "length",
            "content_preview": " - Use efficient algorithms: Implement parallel processing techniques to optimize inference speed, such as using GPU acc",
            "prompt_tokens": 76,
            "completion_tokens": 96,
            "total_tokens": 172
          },
          {
            "request_id": "spark-6-9",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139220.894102,
            "latency_s": 16.36492745799478,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size: Use smaller models that can be inferred quickly, reducing latency.\n2. Implement load balancing:",
            "prompt_tokens": 75,
            "completion_tokens": 77,
            "total_tokens": 152
          },
          {
            "request_id": "spark-6-10",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139222.825929,
            "latency_s": 15.331141041999217,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference models that minimize latency and improve throughput, such as ",
            "prompt_tokens": 76,
            "completion_tokens": 85,
            "total_tokens": 161
          },
          {
            "request_id": "spark-6-14",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139238.157349,
            "latency_s": 3.9907612079987302,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference models that minimize latency, such as using techniques like m",
            "prompt_tokens": 76,
            "completion_tokens": 83,
            "total_tokens": 159
          },
          {
            "request_id": "spark-6-12",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139233.064638,
            "latency_s": 17.33518950000871,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-3",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139204.4368,
            "latency_s": 46.22128195798723,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **1. Caching**:\n\t+ Cache frequent user queries/results\n\t+ Reduce repeated infe",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-13",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139237.259317,
            "latency_s": 15.099884292052593,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the AI model to reduce latency and increase throughput.\n2. Implement load balancing to distribute user requ",
            "prompt_tokens": 75,
            "completion_tokens": 52,
            "total_tokens": 127
          },
          {
            "request_id": "spark-6-17",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139250.658397,
            "latency_s": 4.502562249952462,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size to reduce latency\n2. Use efficient data structures and algorithms\n3. Implement load balancing an",
            "prompt_tokens": 75,
            "completion_tokens": 32,
            "total_tokens": 107
          },
          {
            "request_id": "spark-6-16",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139250.400121,
            "latency_s": 8.516498207987752,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-18",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139252.359622,
            "latency_s": 11.494084291975014,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference algorithms to minimize latency and improve throughput.\n* Opti",
            "prompt_tokens": 76,
            "completion_tokens": 73,
            "total_tokens": 149
          },
          {
            "request_id": "spark-6-20",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139258.91682,
            "latency_s": 8.477670499996748,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-21",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139263.8552,
            "latency_s": 7.899294457980432,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the model architecture to reduce latency.\n2. Use efficient data structures and algorithms for processing in",
            "prompt_tokens": 75,
            "completion_tokens": 47,
            "total_tokens": 122
          },
          {
            "request_id": "spark-6-22",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139267.3949282,
            "latency_s": 9.170121957955416,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference algorithms to minimize latency and improve throughput.\n* Para",
            "prompt_tokens": 76,
            "completion_tokens": 63,
            "total_tokens": 139
          },
          {
            "request_id": "spark-6-24",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139276.5654988,
            "latency_s": 12.081169125041924,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-7",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139212.6641269,
            "latency_s": 76.23139345803065,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-11",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139228.1199,
            "latency_s": 81.69047495897394,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-15",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139242.148472,
            "latency_s": 88.59743687498849,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-19",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139255.161367,
            "latency_s": 96.54019162501208,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-6-23",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139271.755114,
            "latency_s": 100.9161638750229,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          }
        ],
        "samples": [
          {
            "timestamp": 1778139204.226275,
            "unit": "spark",
            "load1": 2.4,
            "load5": 2.02,
            "load15": 1.22,
            "mem_total_mb": 124610,
            "mem_used_mb": 101946,
            "mem_free_mb": 10013,
            "mem_available_mb": 22663,
            "gpu_temp_c": 59.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 15.21
          },
          {
            "timestamp": 1778139207.587648,
            "unit": "spark",
            "load1": 2.4,
            "load5": 2.02,
            "load15": 1.22,
            "mem_total_mb": 124610,
            "mem_used_mb": 107077,
            "mem_free_mb": 5145,
            "mem_available_mb": 17533,
            "gpu_temp_c": 63.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.32
          },
          {
            "timestamp": 1778139211.1430988,
            "unit": "spark",
            "load1": 2.53,
            "load5": 2.06,
            "load15": 1.23,
            "mem_total_mb": 124610,
            "mem_used_mb": 107811,
            "mem_free_mb": 4406,
            "mem_available_mb": 16798,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.83
          },
          {
            "timestamp": 1778139214.644274,
            "unit": "spark",
            "load1": 2.49,
            "load5": 2.06,
            "load15": 1.24,
            "mem_total_mb": 124610,
            "mem_used_mb": 107796,
            "mem_free_mb": 4421,
            "mem_available_mb": 16813,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.97
          },
          {
            "timestamp": 1778139218.06948,
            "unit": "spark",
            "load1": 2.53,
            "load5": 2.07,
            "load15": 1.25,
            "mem_total_mb": 124610,
            "mem_used_mb": 107795,
            "mem_free_mb": 4422,
            "mem_available_mb": 16815,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.97
          },
          {
            "timestamp": 1778139221.626382,
            "unit": "spark",
            "load1": 2.53,
            "load5": 2.07,
            "load15": 1.25,
            "mem_total_mb": 124610,
            "mem_used_mb": 98314,
            "mem_free_mb": 13903,
            "mem_available_mb": 26296,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.06
          },
          {
            "timestamp": 1778139225.1201699,
            "unit": "spark",
            "load1": 2.49,
            "load5": 2.07,
            "load15": 1.25,
            "mem_total_mb": 124610,
            "mem_used_mb": 101318,
            "mem_free_mb": 10898,
            "mem_available_mb": 23292,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 95.0,
            "gpu_power_w": 43.96
          },
          {
            "timestamp": 1778139228.848437,
            "unit": "spark",
            "load1": 2.77,
            "load5": 2.14,
            "load15": 1.28,
            "mem_total_mb": 124610,
            "mem_used_mb": 98710,
            "mem_free_mb": 14092,
            "mem_available_mb": 25900,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.92
          },
          {
            "timestamp": 1778139232.371349,
            "unit": "spark",
            "load1": 2.77,
            "load5": 2.14,
            "load15": 1.28,
            "mem_total_mb": 124610,
            "mem_used_mb": 99800,
            "mem_free_mb": 13001,
            "mem_available_mb": 24809,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.32
          },
          {
            "timestamp": 1778139235.872245,
            "unit": "spark",
            "load1": 2.87,
            "load5": 2.17,
            "load15": 1.29,
            "mem_total_mb": 124610,
            "mem_used_mb": 99832,
            "mem_free_mb": 12961,
            "mem_available_mb": 24777,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.32
          },
          {
            "timestamp": 1778139239.64039,
            "unit": "spark",
            "load1": 2.88,
            "load5": 2.18,
            "load15": 1.3,
            "mem_total_mb": 124610,
            "mem_used_mb": 92496,
            "mem_free_mb": 20584,
            "mem_available_mb": 32114,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.6
          },
          {
            "timestamp": 1778139243.521017,
            "unit": "spark",
            "load1": 2.89,
            "load5": 2.19,
            "load15": 1.31,
            "mem_total_mb": 124610,
            "mem_used_mb": 99145,
            "mem_free_mb": 13098,
            "mem_available_mb": 25465,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.12
          },
          {
            "timestamp": 1778139247.320329,
            "unit": "spark",
            "load1": 2.89,
            "load5": 2.19,
            "load15": 1.31,
            "mem_total_mb": 124610,
            "mem_used_mb": 107855,
            "mem_free_mb": 4451,
            "mem_available_mb": 16755,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.47
          },
          {
            "timestamp": 1778139250.825122,
            "unit": "spark",
            "load1": 2.9,
            "load5": 2.21,
            "load15": 1.32,
            "mem_total_mb": 124610,
            "mem_used_mb": 107848,
            "mem_free_mb": 4458,
            "mem_available_mb": 16762,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.24
          },
          {
            "timestamp": 1778139254.279116,
            "unit": "spark",
            "load1": 2.91,
            "load5": 2.22,
            "load15": 1.33,
            "mem_total_mb": 124610,
            "mem_used_mb": 107856,
            "mem_free_mb": 4449,
            "mem_available_mb": 16754,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.2
          },
          {
            "timestamp": 1778139257.717925,
            "unit": "spark",
            "load1": 2.91,
            "load5": 2.22,
            "load15": 1.33,
            "mem_total_mb": 124610,
            "mem_used_mb": 100960,
            "mem_free_mb": 11345,
            "mem_available_mb": 23650,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.28
          },
          {
            "timestamp": 1778139261.167216,
            "unit": "spark",
            "load1": 3.07,
            "load5": 2.27,
            "load15": 1.35,
            "mem_total_mb": 124610,
            "mem_used_mb": 102153,
            "mem_free_mb": 10152,
            "mem_available_mb": 22457,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.55
          },
          {
            "timestamp": 1778139264.586098,
            "unit": "spark",
            "load1": 3.07,
            "load5": 2.28,
            "load15": 1.36,
            "mem_total_mb": 124610,
            "mem_used_mb": 98577,
            "mem_free_mb": 13727,
            "mem_available_mb": 26032,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.4
          },
          {
            "timestamp": 1778139268.114055,
            "unit": "spark",
            "load1": 2.98,
            "load5": 2.28,
            "load15": 1.36,
            "mem_total_mb": 124610,
            "mem_used_mb": 107876,
            "mem_free_mb": 4461,
            "mem_available_mb": 16733,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.19
          },
          {
            "timestamp": 1778139271.683063,
            "unit": "spark",
            "load1": 2.98,
            "load5": 2.28,
            "load15": 1.36,
            "mem_total_mb": 124610,
            "mem_used_mb": 98627,
            "mem_free_mb": 13740,
            "mem_available_mb": 25982,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.69
          },
          {
            "timestamp": 1778139275.372098,
            "unit": "spark",
            "load1": 3.06,
            "load5": 2.3,
            "load15": 1.38,
            "mem_total_mb": 124610,
            "mem_used_mb": 99839,
            "mem_free_mb": 12525,
            "mem_available_mb": 24770,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.41
          },
          {
            "timestamp": 1778139278.8391838,
            "unit": "spark",
            "load1": 3.06,
            "load5": 2.32,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 90882,
            "mem_free_mb": 21482,
            "mem_available_mb": 33728,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 47.52
          },
          {
            "timestamp": 1778139282.309406,
            "unit": "spark",
            "load1": 3.06,
            "load5": 2.32,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 102275,
            "mem_free_mb": 10335,
            "mem_available_mb": 22335,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 47.02
          },
          {
            "timestamp": 1778139285.766819,
            "unit": "spark",
            "load1": 3.05,
            "load5": 2.33,
            "load15": 1.4,
            "mem_total_mb": 124610,
            "mem_used_mb": 102195,
            "mem_free_mb": 10415,
            "mem_available_mb": 22415,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.19
          },
          {
            "timestamp": 1778139289.17109,
            "unit": "spark",
            "load1": 2.97,
            "load5": 2.32,
            "load15": 1.4,
            "mem_total_mb": 124610,
            "mem_used_mb": 102189,
            "mem_free_mb": 10419,
            "mem_available_mb": 22421,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 94.0,
            "gpu_power_w": 46.49
          },
          {
            "timestamp": 1778139292.64627,
            "unit": "spark",
            "load1": 2.97,
            "load5": 2.32,
            "load15": 1.4,
            "mem_total_mb": 124610,
            "mem_used_mb": 102187,
            "mem_free_mb": 10420,
            "mem_available_mb": 22422,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.71
          },
          {
            "timestamp": 1778139296.0592482,
            "unit": "spark",
            "load1": 2.89,
            "load5": 2.32,
            "load15": 1.4,
            "mem_total_mb": 124610,
            "mem_used_mb": 102186,
            "mem_free_mb": 10421,
            "mem_available_mb": 22424,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.67
          },
          {
            "timestamp": 1778139299.534226,
            "unit": "spark",
            "load1": 2.74,
            "load5": 2.29,
            "load15": 1.4,
            "mem_total_mb": 124610,
            "mem_used_mb": 102178,
            "mem_free_mb": 10419,
            "mem_available_mb": 22432,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.44
          },
          {
            "timestamp": 1778139303.4365911,
            "unit": "spark",
            "load1": 2.6,
            "load5": 2.27,
            "load15": 1.4,
            "mem_total_mb": 124610,
            "mem_used_mb": 102155,
            "mem_free_mb": 10441,
            "mem_available_mb": 22454,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.57
          },
          {
            "timestamp": 1778139306.861297,
            "unit": "spark",
            "load1": 2.6,
            "load5": 2.27,
            "load15": 1.4,
            "mem_total_mb": 124610,
            "mem_used_mb": 102144,
            "mem_free_mb": 10452,
            "mem_available_mb": 22466,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.51
          },
          {
            "timestamp": 1778139310.2653599,
            "unit": "spark",
            "load1": 2.47,
            "load5": 2.25,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 102134,
            "mem_free_mb": 10462,
            "mem_available_mb": 22476,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 85.0,
            "gpu_power_w": 46.28
          },
          {
            "timestamp": 1778139313.6785111,
            "unit": "spark",
            "load1": 2.35,
            "load5": 2.23,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 102121,
            "mem_free_mb": 10475,
            "mem_available_mb": 22489,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.55
          },
          {
            "timestamp": 1778139317.098805,
            "unit": "spark",
            "load1": 2.35,
            "load5": 2.23,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 102140,
            "mem_free_mb": 10379,
            "mem_available_mb": 22470,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.45
          },
          {
            "timestamp": 1778139320.521396,
            "unit": "spark",
            "load1": 2.24,
            "load5": 2.21,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 102112,
            "mem_free_mb": 10406,
            "mem_available_mb": 22497,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.52
          },
          {
            "timestamp": 1778139323.970164,
            "unit": "spark",
            "load1": 2.14,
            "load5": 2.19,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 102106,
            "mem_free_mb": 10413,
            "mem_available_mb": 22504,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.59
          },
          {
            "timestamp": 1778139327.410656,
            "unit": "spark",
            "load1": 2.14,
            "load5": 2.19,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 102092,
            "mem_free_mb": 10426,
            "mem_available_mb": 22518,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.46
          },
          {
            "timestamp": 1778139330.876788,
            "unit": "spark",
            "load1": 2.21,
            "load5": 2.2,
            "load15": 1.4,
            "mem_total_mb": 124610,
            "mem_used_mb": 102097,
            "mem_free_mb": 10421,
            "mem_available_mb": 22512,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.37
          },
          {
            "timestamp": 1778139334.3215559,
            "unit": "spark",
            "load1": 2.12,
            "load5": 2.18,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 102092,
            "mem_free_mb": 10425,
            "mem_available_mb": 22517,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.48
          },
          {
            "timestamp": 1778139337.802613,
            "unit": "spark",
            "load1": 2.12,
            "load5": 2.18,
            "load15": 1.39,
            "mem_total_mb": 124610,
            "mem_used_mb": 102089,
            "mem_free_mb": 10429,
            "mem_available_mb": 22521,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.54
          },
          {
            "timestamp": 1778139341.254456,
            "unit": "spark",
            "load1": 2.27,
            "load5": 2.21,
            "load15": 1.41,
            "mem_total_mb": 124610,
            "mem_used_mb": 102098,
            "mem_free_mb": 10420,
            "mem_available_mb": 22512,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.49
          },
          {
            "timestamp": 1778139344.725881,
            "unit": "spark",
            "load1": 2.33,
            "load5": 2.23,
            "load15": 1.42,
            "mem_total_mb": 124610,
            "mem_used_mb": 102144,
            "mem_free_mb": 10373,
            "mem_available_mb": 22465,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.54
          },
          {
            "timestamp": 1778139348.22107,
            "unit": "spark",
            "load1": 2.22,
            "load5": 2.21,
            "load15": 1.41,
            "mem_total_mb": 124610,
            "mem_used_mb": 102129,
            "mem_free_mb": 10388,
            "mem_available_mb": 22481,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.55
          },
          {
            "timestamp": 1778139351.68828,
            "unit": "spark",
            "load1": 2.22,
            "load5": 2.21,
            "load15": 1.41,
            "mem_total_mb": 124610,
            "mem_used_mb": 102118,
            "mem_free_mb": 10399,
            "mem_available_mb": 22492,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.41
          },
          {
            "timestamp": 1778139355.151076,
            "unit": "spark",
            "load1": 2.12,
            "load5": 2.19,
            "load15": 1.41,
            "mem_total_mb": 124610,
            "mem_used_mb": 102106,
            "mem_free_mb": 10411,
            "mem_available_mb": 22504,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.55
          },
          {
            "timestamp": 1778139358.6212149,
            "unit": "spark",
            "load1": 2.03,
            "load5": 2.17,
            "load15": 1.41,
            "mem_total_mb": 124610,
            "mem_used_mb": 102106,
            "mem_free_mb": 10412,
            "mem_available_mb": 22504,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.53
          },
          {
            "timestamp": 1778139362.05972,
            "unit": "spark",
            "load1": 2.03,
            "load5": 2.17,
            "load15": 1.41,
            "mem_total_mb": 124610,
            "mem_used_mb": 102112,
            "mem_free_mb": 10405,
            "mem_available_mb": 22498,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.44
          },
          {
            "timestamp": 1778139365.823484,
            "unit": "spark",
            "load1": 2.03,
            "load5": 2.16,
            "load15": 1.41,
            "mem_total_mb": 124610,
            "mem_used_mb": 102275,
            "mem_free_mb": 10002,
            "mem_available_mb": 22334,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.42
          },
          {
            "timestamp": 1778139369.235022,
            "unit": "spark",
            "load1": 2.03,
            "load5": 2.16,
            "load15": 1.42,
            "mem_total_mb": 124610,
            "mem_used_mb": 102093,
            "mem_free_mb": 10016,
            "mem_available_mb": 22517,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.4
          },
          {
            "timestamp": 1778139372.643181,
            "unit": "spark",
            "load1": 2.03,
            "load5": 2.16,
            "load15": 1.42,
            "mem_total_mb": 124610,
            "mem_used_mb": 102110,
            "mem_free_mb": 9999,
            "mem_available_mb": 22499,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 38.0
          },
          {
            "timestamp": 1778139372.981717,
            "unit": "spark",
            "load1": 2.03,
            "load5": 2.16,
            "load15": 1.42,
            "mem_total_mb": 124610,
            "mem_used_mb": 102133,
            "mem_free_mb": 9976,
            "mem_available_mb": 22476,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 27.76
          }
        ]
      },
      {
        "summary": {
          "concurrency": 8,
          "total_requests": 32,
          "request_count": 32,
          "success_count": 32,
          "error_count": 0,
          "error_rate": 0.0,
          "wall_s": 223.116339124972,
          "throughput_rps": 0.14342293408675977,
          "latency_avg_s": 38.14707736721721,
          "latency_p50_s": 17.78267337498255,
          "latency_p95_s": 127.34639058297907,
          "latency_max_s": 134.57571875001304,
          "completion_tps": 12.1192379303312,
          "sample_errors": [],
          "peak_gpu_temp_c": 67.0,
          "peak_gpu_util_pct": 96.0,
          "peak_gpu_power_w": 47.16,
          "peak_load1": 2.89,
          "peak_mem_used_mb": 108065
        },
        "requests": [
          {
            "request_id": "spark-8-4",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139375.525961,
            "latency_s": 8.19357524998486,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need operational ",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-5",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139375.526082,
            "latency_s": 8.456236708036158,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size to reduce latency\n2. Use efficient data structures for input processing\n3. Implement load balanc",
            "prompt_tokens": 75,
            "completion_tokens": 38,
            "total_tokens": 113
          },
          {
            "request_id": "spark-8-1",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139375.525429,
            "latency_s": 15.933707499993034,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size: Use smaller models that can be loaded and inferred quickly, reducing latency.\n2. Implement load",
            "prompt_tokens": 75,
            "completion_tokens": 85,
            "total_tokens": 160
          },
          {
            "request_id": "spark-8-8",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139375.5264509,
            "latency_s": 17.662233916984405,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-9",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139383.719904,
            "latency_s": 11.74643937498331,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the model architecture to reduce latency.\n2. Use efficient data structures and algorithms for processing in",
            "prompt_tokens": 75,
            "completion_tokens": 46,
            "total_tokens": 121
          },
          {
            "request_id": "spark-8-12",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139393.1889439,
            "latency_s": 8.204366165969986,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-2",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139375.525641,
            "latency_s": 28.9993622500333,
            "finish_reason": "length",
            "content_preview": " - Use efficient algorithms: Implement optimized inference models that can handle high volumes of requests efficiently, ",
            "prompt_tokens": 76,
            "completion_tokens": 96,
            "total_tokens": 172
          },
          {
            "request_id": "spark-8-6",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139375.526218,
            "latency_s": 33.181354959029704,
            "finish_reason": "length",
            "content_preview": " * Use efficient algorithms: Implement optimized machine learning models that can handle high volumes of requests quickl",
            "prompt_tokens": 76,
            "completion_tokens": 96,
            "total_tokens": 172
          },
          {
            "request_id": "spark-8-10",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139383.982616,
            "latency_s": 30.301839750027284,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference models that can handle multiple requests concurrently, ensuri",
            "prompt_tokens": 76,
            "completion_tokens": 87,
            "total_tokens": 163
          },
          {
            "request_id": "spark-8-16",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139408.7078888,
            "latency_s": 9.121572416974232,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-14",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139401.393738,
            "latency_s": 17.390362624952104,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized machine learning models to reduce inference time, such as using model p",
            "prompt_tokens": 76,
            "completion_tokens": 75,
            "total_tokens": 151
          },
          {
            "request_id": "spark-8-18",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139417.829939,
            "latency_s": 5.12307262502145,
            "finish_reason": "length",
            "content_preview": " * Use efficient algorithms: Implement optimized machine learning models that can handle high volumes of requests effici",
            "prompt_tokens": 76,
            "completion_tokens": 96,
            "total_tokens": 172
          },
          {
            "request_id": "spark-8-7",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139375.5263438,
            "latency_s": 48.00194529205328,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-20",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139422.953421,
            "latency_s": 8.542301458015572,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-13",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139395.466708,
            "latency_s": 36.95754087500973,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the model architecture to reduce latency.\n2. Use efficient data structures and algorithms for processing in",
            "prompt_tokens": 75,
            "completion_tokens": 46,
            "total_tokens": 121
          },
          {
            "request_id": "spark-8-17",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139414.2848792,
            "latency_s": 20.66066504199989,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size to reduce latency\n2. Use efficient data structures for input and output\n3. Implement load balanc",
            "prompt_tokens": 75,
            "completion_tokens": 39,
            "total_tokens": 114
          },
          {
            "request_id": "spark-8-21",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139423.528781,
            "latency_s": 17.903112832980696,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize model size: Use smaller models or prune larger ones to reduce memory usage and latency.\n\n2. Implement load ",
            "prompt_tokens": 75,
            "completion_tokens": 85,
            "total_tokens": 160
          },
          {
            "request_id": "spark-8-22",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139431.49599,
            "latency_s": 10.81818491697777,
            "finish_reason": "length",
            "content_preview": " * Use efficient algorithms: Implement optimized inference models that can handle high volumes of requests quickly, such",
            "prompt_tokens": 76,
            "completion_tokens": 96,
            "total_tokens": 172
          },
          {
            "request_id": "spark-8-26",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139442.3144681,
            "latency_s": 4.078389416972641,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference models that minimize latency, such as using techniques like m",
            "prompt_tokens": 76,
            "completion_tokens": 85,
            "total_tokens": 161
          },
          {
            "request_id": "spark-8-24",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139434.9460502,
            "latency_s": 19.581673624983523,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-25",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139441.432354,
            "latency_s": 18.067364874994382,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the model: Use efficient models that are optimized for inference to reduce latency.\n\n2. Distribute workload",
            "prompt_tokens": 75,
            "completion_tokens": 88,
            "total_tokens": 163
          },
          {
            "request_id": "spark-8-29",
            "model": "llava:latest",
            "ok": true,
            "started_at": 1778139459.500064,
            "latency_s": 4.127396833966486,
            "finish_reason": "stop",
            "content_preview": " 1. Optimize the model architecture to reduce latency and memory usage.\n2. Use efficient data structures and algorithms ",
            "prompt_tokens": 75,
            "completion_tokens": 47,
            "total_tokens": 122
          },
          {
            "request_id": "spark-8-28",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139454.528102,
            "latency_s": 9.535531750007067,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-3",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139375.5257988,
            "latency_s": 92.78141204098938,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-30",
            "model": "nemotron-mini:latest",
            "ok": true,
            "started_at": 1778139463.627889,
            "latency_s": 8.107702250010334,
            "finish_reason": "stop",
            "content_preview": " * Use efficient algorithms: Implement optimized inference algorithms to reduce latency and improve throughput.\n* Optimi",
            "prompt_tokens": 76,
            "completion_tokens": 63,
            "total_tokens": 139
          },
          {
            "request_id": "spark-8-32",
            "model": "qwen3:8b",
            "ok": true,
            "started_at": 1778139468.3080058,
            "latency_s": 7.866115290962625,
            "finish_reason": "length",
            "content_preview": "Okay, the user is testing their AI inference cluster's ability to handle concurrent chat traffic. They need three operat",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-11",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139391.459568,
            "latency_s": 102.30593383405358,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across multiple AI nodes\n\t+ E",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-15",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139404.525561,
            "latency_s": 110.18720004201168,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-19",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139418.784279,
            "latency_s": 116.90575291699497,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-23",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139432.424665,
            "latency_s": 124.21606008295203,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-27",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139446.393257,
            "latency_s": 131.17235008301213,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balancing**:\n\t+ Distribute incoming chats across all available AI nodes",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          },
          {
            "request_id": "spark-8-31",
            "model": "nemotron:latest",
            "ok": true,
            "started_at": 1778139464.0638962,
            "latency_s": 134.57571875001304,
            "finish_reason": "length",
            "content_preview": "Here are three short operational tips:\n\n* **Load Balance**: \n\t+ Distribute incoming chats across multiple AI nodes\n\t+ Us",
            "prompt_tokens": 79,
            "completion_tokens": 96,
            "total_tokens": 175
          }
        ],
        "samples": [
          {
            "timestamp": 1778139375.315008,
            "unit": "spark",
            "load1": 2.02,
            "load5": 2.16,
            "load15": 1.42,
            "mem_total_mb": 124610,
            "mem_used_mb": 102203,
            "mem_free_mb": 9906,
            "mem_available_mb": 22406,
            "gpu_temp_c": 59.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 15.32
          },
          {
            "timestamp": 1778139378.681098,
            "unit": "spark",
            "load1": 2.1,
            "load5": 2.17,
            "load15": 1.43,
            "mem_total_mb": 124610,
            "mem_used_mb": 107791,
            "mem_free_mb": 4580,
            "mem_available_mb": 16819,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.57
          },
          {
            "timestamp": 1778139382.371125,
            "unit": "spark",
            "load1": 2.1,
            "load5": 2.17,
            "load15": 1.43,
            "mem_total_mb": 124610,
            "mem_used_mb": 107931,
            "mem_free_mb": 4438,
            "mem_available_mb": 16678,
            "gpu_temp_c": 64.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.94
          },
          {
            "timestamp": 1778139385.811508,
            "unit": "spark",
            "load1": 2.17,
            "load5": 2.18,
            "load15": 1.44,
            "mem_total_mb": 124610,
            "mem_used_mb": 107894,
            "mem_free_mb": 4475,
            "mem_available_mb": 16715,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.08
          },
          {
            "timestamp": 1778139389.2471948,
            "unit": "spark",
            "load1": 2.24,
            "load5": 2.2,
            "load15": 1.45,
            "mem_total_mb": 124610,
            "mem_used_mb": 107897,
            "mem_free_mb": 4472,
            "mem_available_mb": 16712,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.16
          },
          {
            "timestamp": 1778139392.748733,
            "unit": "spark",
            "load1": 2.24,
            "load5": 2.2,
            "load15": 1.45,
            "mem_total_mb": 124610,
            "mem_used_mb": 107885,
            "mem_free_mb": 4484,
            "mem_available_mb": 16725,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.17
          },
          {
            "timestamp": 1778139396.231446,
            "unit": "spark",
            "load1": 2.38,
            "load5": 2.23,
            "load15": 1.46,
            "mem_total_mb": 124610,
            "mem_used_mb": 98487,
            "mem_free_mb": 13882,
            "mem_available_mb": 26123,
            "gpu_temp_c": 65.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.44
          },
          {
            "timestamp": 1778139399.7399218,
            "unit": "spark",
            "load1": 2.51,
            "load5": 2.26,
            "load15": 1.47,
            "mem_total_mb": 124610,
            "mem_used_mb": 102216,
            "mem_free_mb": 10153,
            "mem_available_mb": 22394,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 95.0,
            "gpu_power_w": 46.22
          },
          {
            "timestamp": 1778139403.2879071,
            "unit": "spark",
            "load1": 2.47,
            "load5": 2.25,
            "load15": 1.48,
            "mem_total_mb": 124610,
            "mem_used_mb": 102204,
            "mem_free_mb": 10164,
            "mem_available_mb": 22405,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.33
          },
          {
            "timestamp": 1778139406.829151,
            "unit": "spark",
            "load1": 2.47,
            "load5": 2.25,
            "load15": 1.48,
            "mem_total_mb": 124610,
            "mem_used_mb": 102216,
            "mem_free_mb": 10153,
            "mem_available_mb": 22394,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.34
          },
          {
            "timestamp": 1778139410.28667,
            "unit": "spark",
            "load1": 2.43,
            "load5": 2.25,
            "load15": 1.48,
            "mem_total_mb": 124610,
            "mem_used_mb": 102213,
            "mem_free_mb": 10156,
            "mem_available_mb": 22397,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.59
          },
          {
            "timestamp": 1778139413.6703382,
            "unit": "spark",
            "load1": 2.48,
            "load5": 2.26,
            "load15": 1.49,
            "mem_total_mb": 124610,
            "mem_used_mb": 102222,
            "mem_free_mb": 10147,
            "mem_available_mb": 22388,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.54
          },
          {
            "timestamp": 1778139417.101304,
            "unit": "spark",
            "load1": 2.48,
            "load5": 2.26,
            "load15": 1.49,
            "mem_total_mb": 124610,
            "mem_used_mb": 102215,
            "mem_free_mb": 10153,
            "mem_available_mb": 22394,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.72
          },
          {
            "timestamp": 1778139420.5223758,
            "unit": "spark",
            "load1": 2.44,
            "load5": 2.26,
            "load15": 1.49,
            "mem_total_mb": 124610,
            "mem_used_mb": 102215,
            "mem_free_mb": 10153,
            "mem_available_mb": 22395,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.47
          },
          {
            "timestamp": 1778139423.992705,
            "unit": "spark",
            "load1": 2.4,
            "load5": 2.25,
            "load15": 1.49,
            "mem_total_mb": 124610,
            "mem_used_mb": 102203,
            "mem_free_mb": 10161,
            "mem_available_mb": 22406,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.57
          },
          {
            "timestamp": 1778139427.91623,
            "unit": "spark",
            "load1": 2.61,
            "load5": 2.3,
            "load15": 1.51,
            "mem_total_mb": 124610,
            "mem_used_mb": 107965,
            "mem_free_mb": 4692,
            "mem_available_mb": 16645,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.69
          },
          {
            "timestamp": 1778139431.399693,
            "unit": "spark",
            "load1": 2.61,
            "load5": 2.3,
            "load15": 1.51,
            "mem_total_mb": 124610,
            "mem_used_mb": 107937,
            "mem_free_mb": 4720,
            "mem_available_mb": 16673,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.16
          },
          {
            "timestamp": 1778139434.816488,
            "unit": "spark",
            "load1": 2.64,
            "load5": 2.31,
            "load15": 1.52,
            "mem_total_mb": 124610,
            "mem_used_mb": 98805,
            "mem_free_mb": 14145,
            "mem_available_mb": 25805,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.71
          },
          {
            "timestamp": 1778139438.411059,
            "unit": "spark",
            "load1": 2.67,
            "load5": 2.32,
            "load15": 1.53,
            "mem_total_mb": 124610,
            "mem_used_mb": 99982,
            "mem_free_mb": 12967,
            "mem_available_mb": 24628,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.69
          },
          {
            "timestamp": 1778139441.826922,
            "unit": "spark",
            "load1": 2.67,
            "load5": 2.32,
            "load15": 1.53,
            "mem_total_mb": 124610,
            "mem_used_mb": 90476,
            "mem_free_mb": 22473,
            "mem_available_mb": 34134,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 44.77
          },
          {
            "timestamp": 1778139445.243146,
            "unit": "spark",
            "load1": 2.7,
            "load5": 2.33,
            "load15": 1.54,
            "mem_total_mb": 124610,
            "mem_used_mb": 102036,
            "mem_free_mb": 11195,
            "mem_available_mb": 22574,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.03
          },
          {
            "timestamp": 1778139448.756814,
            "unit": "spark",
            "load1": 2.72,
            "load5": 2.35,
            "load15": 1.55,
            "mem_total_mb": 124610,
            "mem_used_mb": 102884,
            "mem_free_mb": 9557,
            "mem_available_mb": 21726,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.25
          },
          {
            "timestamp": 1778139452.154963,
            "unit": "spark",
            "load1": 2.72,
            "load5": 2.35,
            "load15": 1.55,
            "mem_total_mb": 124610,
            "mem_used_mb": 108059,
            "mem_free_mb": 4675,
            "mem_available_mb": 16550,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.53
          },
          {
            "timestamp": 1778139455.6066718,
            "unit": "spark",
            "load1": 2.75,
            "load5": 2.36,
            "load15": 1.55,
            "mem_total_mb": 124610,
            "mem_used_mb": 108062,
            "mem_free_mb": 4671,
            "mem_available_mb": 16548,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.55
          },
          {
            "timestamp": 1778139459.086387,
            "unit": "spark",
            "load1": 2.77,
            "load5": 2.37,
            "load15": 1.56,
            "mem_total_mb": 124610,
            "mem_used_mb": 108059,
            "mem_free_mb": 4674,
            "mem_available_mb": 16551,
            "gpu_temp_c": 67.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.6
          },
          {
            "timestamp": 1778139462.4986901,
            "unit": "spark",
            "load1": 2.77,
            "load5": 2.37,
            "load15": 1.56,
            "mem_total_mb": 124610,
            "mem_used_mb": 108065,
            "mem_free_mb": 4668,
            "mem_available_mb": 16545,
            "gpu_temp_c": 67.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.62
          },
          {
            "timestamp": 1778139465.94861,
            "unit": "spark",
            "load1": 2.87,
            "load5": 2.4,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 99014,
            "mem_free_mb": 13719,
            "mem_available_mb": 25596,
            "gpu_temp_c": 67.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 47.16
          },
          {
            "timestamp": 1778139469.5824041,
            "unit": "spark",
            "load1": 2.8,
            "load5": 2.39,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 102338,
            "mem_free_mb": 10394,
            "mem_available_mb": 22271,
            "gpu_temp_c": 67.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.95
          },
          {
            "timestamp": 1778139473.006334,
            "unit": "spark",
            "load1": 2.89,
            "load5": 2.42,
            "load15": 1.59,
            "mem_total_mb": 124610,
            "mem_used_mb": 102337,
            "mem_free_mb": 10395,
            "mem_available_mb": 22272,
            "gpu_temp_c": 67.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.39
          },
          {
            "timestamp": 1778139476.4388359,
            "unit": "spark",
            "load1": 2.89,
            "load5": 2.42,
            "load15": 1.59,
            "mem_total_mb": 124610,
            "mem_used_mb": 102322,
            "mem_free_mb": 10410,
            "mem_available_mb": 22287,
            "gpu_temp_c": 67.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.71
          },
          {
            "timestamp": 1778139479.841443,
            "unit": "spark",
            "load1": 2.74,
            "load5": 2.39,
            "load15": 1.59,
            "mem_total_mb": 124610,
            "mem_used_mb": 102316,
            "mem_free_mb": 10416,
            "mem_available_mb": 22294,
            "gpu_temp_c": 67.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.13
          },
          {
            "timestamp": 1778139483.2821481,
            "unit": "spark",
            "load1": 2.6,
            "load5": 2.37,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 102309,
            "mem_free_mb": 10423,
            "mem_available_mb": 22301,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.47
          },
          {
            "timestamp": 1778139486.7664099,
            "unit": "spark",
            "load1": 2.6,
            "load5": 2.37,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 102307,
            "mem_free_mb": 10413,
            "mem_available_mb": 22302,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.31
          },
          {
            "timestamp": 1778139490.5785,
            "unit": "spark",
            "load1": 2.47,
            "load5": 2.35,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 102308,
            "mem_free_mb": 10411,
            "mem_available_mb": 22302,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.67
          },
          {
            "timestamp": 1778139494.0072808,
            "unit": "spark",
            "load1": 2.35,
            "load5": 2.32,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 102309,
            "mem_free_mb": 10410,
            "mem_available_mb": 22301,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.74
          },
          {
            "timestamp": 1778139497.4562418,
            "unit": "spark",
            "load1": 2.35,
            "load5": 2.32,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 102312,
            "mem_free_mb": 10407,
            "mem_available_mb": 22298,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.72
          },
          {
            "timestamp": 1778139500.8792078,
            "unit": "spark",
            "load1": 2.33,
            "load5": 2.32,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 102322,
            "mem_free_mb": 10397,
            "mem_available_mb": 22288,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.65
          },
          {
            "timestamp": 1778139504.3038359,
            "unit": "spark",
            "load1": 2.22,
            "load5": 2.3,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 102317,
            "mem_free_mb": 10402,
            "mem_available_mb": 22293,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.55
          },
          {
            "timestamp": 1778139507.854649,
            "unit": "spark",
            "load1": 2.22,
            "load5": 2.3,
            "load15": 1.58,
            "mem_total_mb": 124610,
            "mem_used_mb": 102308,
            "mem_free_mb": 10410,
            "mem_available_mb": 22302,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.61
          },
          {
            "timestamp": 1778139511.307949,
            "unit": "spark",
            "load1": 2.6,
            "load5": 2.37,
            "load15": 1.61,
            "mem_total_mb": 124610,
            "mem_used_mb": 102344,
            "mem_free_mb": 10374,
            "mem_available_mb": 22266,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.61
          },
          {
            "timestamp": 1778139514.698165,
            "unit": "spark",
            "load1": 2.47,
            "load5": 2.35,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102336,
            "mem_free_mb": 10383,
            "mem_available_mb": 22274,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.27
          },
          {
            "timestamp": 1778139518.158599,
            "unit": "spark",
            "load1": 2.36,
            "load5": 2.33,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102324,
            "mem_free_mb": 10394,
            "mem_available_mb": 22285,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.62
          },
          {
            "timestamp": 1778139521.6901262,
            "unit": "spark",
            "load1": 2.36,
            "load5": 2.33,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102371,
            "mem_free_mb": 10296,
            "mem_available_mb": 22238,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.02
          },
          {
            "timestamp": 1778139525.157374,
            "unit": "spark",
            "load1": 2.33,
            "load5": 2.32,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102309,
            "mem_free_mb": 10062,
            "mem_available_mb": 22300,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.44
          },
          {
            "timestamp": 1778139528.6358888,
            "unit": "spark",
            "load1": 2.22,
            "load5": 2.3,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102303,
            "mem_free_mb": 10068,
            "mem_available_mb": 22306,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.6
          },
          {
            "timestamp": 1778139532.104645,
            "unit": "spark",
            "load1": 2.22,
            "load5": 2.3,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102315,
            "mem_free_mb": 10057,
            "mem_available_mb": 22295,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.68
          },
          {
            "timestamp": 1778139535.565656,
            "unit": "spark",
            "load1": 2.12,
            "load5": 2.28,
            "load15": 1.59,
            "mem_total_mb": 124610,
            "mem_used_mb": 102309,
            "mem_free_mb": 10062,
            "mem_available_mb": 22301,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.55
          },
          {
            "timestamp": 1778139539.1092281,
            "unit": "spark",
            "load1": 2.03,
            "load5": 2.26,
            "load15": 1.59,
            "mem_total_mb": 124610,
            "mem_used_mb": 102306,
            "mem_free_mb": 10065,
            "mem_available_mb": 22304,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.56
          },
          {
            "timestamp": 1778139542.654599,
            "unit": "spark",
            "load1": 2.03,
            "load5": 2.26,
            "load15": 1.59,
            "mem_total_mb": 124610,
            "mem_used_mb": 102322,
            "mem_free_mb": 10049,
            "mem_available_mb": 22288,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.61
          },
          {
            "timestamp": 1778139546.133784,
            "unit": "spark",
            "load1": 2.11,
            "load5": 2.27,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102324,
            "mem_free_mb": 10047,
            "mem_available_mb": 22285,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.64
          },
          {
            "timestamp": 1778139549.565763,
            "unit": "spark",
            "load1": 2.02,
            "load5": 2.25,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102307,
            "mem_free_mb": 10063,
            "mem_available_mb": 22302,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.54
          },
          {
            "timestamp": 1778139553.3022048,
            "unit": "spark",
            "load1": 2.1,
            "load5": 2.26,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102309,
            "mem_free_mb": 10062,
            "mem_available_mb": 22301,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.58
          },
          {
            "timestamp": 1778139556.811543,
            "unit": "spark",
            "load1": 2.1,
            "load5": 2.26,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102315,
            "mem_free_mb": 10056,
            "mem_available_mb": 22295,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.93
          },
          {
            "timestamp": 1778139560.263294,
            "unit": "spark",
            "load1": 2.01,
            "load5": 2.24,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102310,
            "mem_free_mb": 10061,
            "mem_available_mb": 22300,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.57
          },
          {
            "timestamp": 1778139563.721102,
            "unit": "spark",
            "load1": 1.93,
            "load5": 2.22,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102308,
            "mem_free_mb": 10063,
            "mem_available_mb": 22302,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.64
          },
          {
            "timestamp": 1778139567.303204,
            "unit": "spark",
            "load1": 1.93,
            "load5": 2.22,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102305,
            "mem_free_mb": 10066,
            "mem_available_mb": 22305,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.62
          },
          {
            "timestamp": 1778139570.9036689,
            "unit": "spark",
            "load1": 1.94,
            "load5": 2.21,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102303,
            "mem_free_mb": 10068,
            "mem_available_mb": 22307,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.6
          },
          {
            "timestamp": 1778139574.387112,
            "unit": "spark",
            "load1": 1.86,
            "load5": 2.19,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102311,
            "mem_free_mb": 10060,
            "mem_available_mb": 22299,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.57
          },
          {
            "timestamp": 1778139577.886131,
            "unit": "spark",
            "load1": 1.86,
            "load5": 2.19,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102306,
            "mem_free_mb": 10065,
            "mem_available_mb": 22304,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.61
          },
          {
            "timestamp": 1778139581.357099,
            "unit": "spark",
            "load1": 1.95,
            "load5": 2.21,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102304,
            "mem_free_mb": 10067,
            "mem_available_mb": 22306,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.63
          },
          {
            "timestamp": 1778139584.8289878,
            "unit": "spark",
            "load1": 1.87,
            "load5": 2.19,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102306,
            "mem_free_mb": 10065,
            "mem_available_mb": 22304,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.53
          },
          {
            "timestamp": 1778139588.323694,
            "unit": "spark",
            "load1": 1.8,
            "load5": 2.17,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102299,
            "mem_free_mb": 10071,
            "mem_available_mb": 22311,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.61
          },
          {
            "timestamp": 1778139591.804739,
            "unit": "spark",
            "load1": 1.8,
            "load5": 2.17,
            "load15": 1.6,
            "mem_total_mb": 124610,
            "mem_used_mb": 102300,
            "mem_free_mb": 10069,
            "mem_available_mb": 22310,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 46.24
          },
          {
            "timestamp": 1778139595.249085,
            "unit": "spark",
            "load1": 1.74,
            "load5": 2.15,
            "load15": 1.59,
            "mem_total_mb": 124610,
            "mem_used_mb": 102297,
            "mem_free_mb": 10071,
            "mem_available_mb": 22313,
            "gpu_temp_c": 66.0,
            "gpu_util_pct": 96.0,
            "gpu_power_w": 45.93
          },
          {
            "timestamp": 1778139598.7523232,
            "unit": "spark",
            "load1": 1.68,
            "load5": 2.13,
            "load15": 1.59,
            "mem_total_mb": 124610,
            "mem_used_mb": 102290,
            "mem_free_mb": 10078,
            "mem_available_mb": 22320,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 33.68
          },
          {
            "timestamp": 1778139599.075281,
            "unit": "spark",
            "load1": 1.68,
            "load5": 2.13,
            "load15": 1.59,
            "mem_total_mb": 124610,
            "mem_used_mb": 102287,
            "mem_free_mb": 10081,
            "mem_available_mb": 22323,
            "gpu_temp_c": 61.0,
            "gpu_util_pct": 0.0,
            "gpu_power_w": 23.69
          }
        ]
      }
    ],
    "baseline_p50_s": 3.3814984375203494
  }
}