{
  "title": "The Architect's Stillness Video Model Sweep",
  "published_date": "2026-05-09",
  "run_id": "architects-stillness-20260508T232959Z",
  "client_id": "codex-architects-stillness-20260508T232959Z",
  "remote_movie_directory": "/srv/neonflux/shared/chat-assets/movie/architects-stillness-20260508T232959Z",
  "local_artifact_directory": "/Users/nation/Documents/New project/artifacts/architects-stillness-20260508T232959Z",
  "source_asset_id": "108e3c7634cd4a77958d716d77395d16",
  "finished_at": "2026-05-09T02:09:00.242Z",
  "headline": {
    "models_tested": 10,
    "completed": 3,
    "failed": 7,
    "best_completed_model": "CogVideoX 5B",
    "best_completed_prompt_match_10": 3.5,
    "production_readiness": "The current text-to-video lanes can produce reviewable atmosphere clips, but this prompt exceeds the narrative and object-binding ability of the completed models. Several image-to-video adapters need runtime fixes before creative evaluation."
  },
  "source_keyframe_assessment": {
    "rating_10": 2.0,
    "verdict": "The generated source frame captured wet cinematic scale and lone figures, but missed the obsidian dome, exposed cybernetic brain, porcelain doll, golden neural ripple, and reflected-eye endpoint."
  },
  "vitals": {
    "sample_count": 292,
    "first_sample": "2026-05-08T23:30:11.518Z",
    "last_sample": "2026-05-09T02:08:58.363Z",
    "avg_gpu_util_pct": 85.7,
    "p95_gpu_util_pct": 96.0,
    "max_gpu_util_pct": 96.0,
    "avg_temp_c": 57.8,
    "max_temp_c": 83.0,
    "avg_power_w": 31.7,
    "max_power_w": 93.66,
    "active_samples": 260,
    "multi_generator_samples": 24
  },
  "models": [
    {
      "model": "Wan 2.1 T2V 1.3B",
      "model_id": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
      "task": "text-to-video",
      "status": "completed",
      "latency_seconds": 343.94,
      "resolution": "832x480",
      "frames": 49,
      "fps": 16,
      "steps": 20,
      "guidance_scale": 5,
      "visual_rating_10": 1.0,
      "prompt_match_10": 0.5,
      "quality_rating_10": 1.0,
      "verdict": "Rendered a valid MP4 but the image is effectively black, so it fails the visual and narrative requirements.",
      "matched_prompt_elements": [],
      "missed_prompt_elements": [
        "obsidian dome",
        "exposed brain",
        "child and doll",
        "soldier cutaway",
        "reflected eye"
      ],
      "failure_class": "",
      "error_excerpt": ""
    },
    {
      "model": "CogVideoX 2B",
      "model_id": "zai-org/CogVideoX-2b",
      "task": "text-to-video",
      "status": "completed",
      "latency_seconds": 209.27,
      "resolution": "720x480",
      "frames": 49,
      "fps": 8,
      "steps": 28,
      "guidance_scale": 6,
      "visual_rating_10": 3.0,
      "prompt_match_10": 2.5,
      "quality_rating_10": 4.0,
      "verdict": "Produces a coherent Gothic/industrial surface with cracked-glass energy, but it is almost static and omits the central story objects.",
      "matched_prompt_elements": [
        "dark Gothic architecture",
        "cold metal palette",
        "cracked/fogged surfaces"
      ],
      "missed_prompt_elements": [
        "brain network",
        "child/doll action",
        "remote soldier",
        "final reflected eye"
      ],
      "failure_class": "",
      "error_excerpt": ""
    },
    {
      "model": "CogVideoX 5B",
      "model_id": "zai-org/CogVideoX-5b",
      "task": "text-to-video",
      "status": "completed",
      "latency_seconds": 714.53,
      "resolution": "720x480",
      "frames": 49,
      "fps": 8,
      "steps": 32,
      "guidance_scale": 6,
      "visual_rating_10": 4.0,
      "prompt_match_10": 3.5,
      "quality_rating_10": 5.0,
      "verdict": "Best completed clip: it suggests a cracked dome/cathedral mood, but remains mostly environmental and does not execute the prompt sequence.",
      "matched_prompt_elements": [
        "cathedral/dome silhouette",
        "high-contrast horror palette",
        "wet metallic atmosphere"
      ],
      "missed_prompt_elements": [
        "visible cyborg brain",
        "gold neural ripple",
        "child/doll beat",
        "soldier control beat",
        "eye reflection"
      ],
      "failure_class": "",
      "error_excerpt": ""
    },
    {
      "model": "Mochi 1 Preview",
      "model_id": "genmo/mochi-1-preview",
      "task": "text-to-video",
      "status": "failed",
      "latency_seconds": 20.73,
      "resolution": "848x480",
      "frames": 31,
      "fps": 8,
      "steps": 28,
      "guidance_scale": 4.5,
      "visual_rating_10": null,
      "prompt_match_10": null,
      "quality_rating_10": null,
      "verdict": "CUDA out of memory during model load/generation.",
      "matched_prompt_elements": [],
      "missed_prompt_elements": [],
      "failure_class": "CUDA out of memory during model load/generation.",
      "error_excerpt": "[perknation@192.168.12.251] {\"ok\": false, \"error\": \"CUDA error: out of memory\\nSearch for `cudaErrorMemoryAllocation' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information.\\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1\\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\"}"
    },
    {
      "model": "SkyReels V2 DF 14B 540P",
      "model_id": "Skywork/SkyReels-V2-DF-14B-540P-Diffusers",
      "task": "text-to-video",
      "status": "failed",
      "latency_seconds": 7200.01,
      "resolution": "960x544",
      "frames": 49,
      "fps": 16,
      "steps": 24,
      "guidance_scale": 6,
      "visual_rating_10": null,
      "prompt_match_10": null,
      "quality_rating_10": null,
      "verdict": "Exceeded the 7,200,000 ms HF video runtime timeout at 960x544, 49 frames.",
      "matched_prompt_elements": [],
      "missed_prompt_elements": [],
      "failure_class": "Exceeded the 7,200,000 ms HF video runtime timeout at 960x544, 49 frames.",
      "error_excerpt": "[perknation@192.168.12.251] HF video runtime timed out after 7200000ms"
    },
    {
      "model": "LTX Video 13B Distilled",
      "model_id": "Lightricks/LTX-Video-0.9.8-13B-distilled",
      "task": "image-to-video",
      "status": "failed",
      "latency_seconds": 48.88,
      "resolution": "704x512",
      "frames": 65,
      "fps": 12,
      "steps": 18,
      "guidance_scale": 3,
      "visual_rating_10": null,
      "prompt_match_10": null,
      "quality_rating_10": null,
      "verdict": "CUDA out of memory despite supplied source keyframe.",
      "matched_prompt_elements": [],
      "missed_prompt_elements": [],
      "failure_class": "CUDA out of memory despite supplied source keyframe.",
      "error_excerpt": "[perknation@192.168.12.251] {\"ok\": false, \"error\": \"CUDA error: out of memory\\nSearch for `cudaErrorMemoryAllocation' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information.\\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1\\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. Hint: This model needs a source image. P"
    },
    {
      "model": "HunyuanVideo I2V",
      "model_id": "hunyuanvideo-community/HunyuanVideo-I2V",
      "task": "image-to-video",
      "status": "failed",
      "latency_seconds": 300.56,
      "resolution": "960x544",
      "frames": 49,
      "fps": 16,
      "steps": 18,
      "guidance_scale": 1,
      "visual_rating_10": null,
      "prompt_match_10": null,
      "quality_rating_10": null,
      "verdict": "Runtime tensor/indexing failure after source-image handoff; likely adapter/source conditioning incompatibility.",
      "matched_prompt_elements": [],
      "missed_prompt_elements": [],
      "failure_class": "Runtime tensor/indexing failure after source-image handoff; likely adapter/source conditioning incompatibility.",
      "error_excerpt": "[perknation@192.168.12.251] {\"ok\": false, \"error\": \"index -1 is out of bounds for dimension 1 with size 0 Hint: This model needs a source image. Provide inputImageBase64 or inputAssetId from the public page. HunyuanVideo-I2V is heavy. If this fails, reduce resolution or keep only one active video job at a time.\"}"
    },
    {
      "model": "SkyReels V1 Hunyuan I2V",
      "model_id": "Skywork/SkyReels-V1-Hunyuan-I2V",
      "task": "image-to-video",
      "status": "failed",
      "latency_seconds": 5.75,
      "resolution": "960x544",
      "frames": 49,
      "fps": 16,
      "steps": 22,
      "guidance_scale": 1,
      "visual_rating_10": null,
      "prompt_match_10": null,
      "quality_rating_10": null,
      "verdict": "Repository layout mismatch: missing model_index.json for the current diffusers loader path.",
      "matched_prompt_elements": [],
      "missed_prompt_elements": [],
      "failure_class": "Repository layout mismatch: missing model_index.json for the current diffusers loader path.",
      "error_excerpt": "[perknation@192.168.12.251] {\"ok\": false, \"error\": \"404 Client Error. (Request ID: Root=1-69fe94b5-6cc0a0a4608f58ed4b9d7af7;36e4dfcd-3711-4f12-ab74-86886dfd284d)\\n\\nEntry Not Found for url: https://huggingface.co/Skywork/SkyReels-V1-Hunyuan-I2V/resolve/main/model_index.json. Hint: This model needs a source image. Provide inputImageBase64 or inputAssetId from the public page.\"}"
    },
    {
      "model": "SkyReels V2 I2V 14B 540P",
      "model_id": "Skywork/SkyReels-V2-I2V-14B-540P-Diffusers",
      "task": "image-to-video",
      "status": "failed",
      "latency_seconds": 607.73,
      "resolution": "960x544",
      "frames": 49,
      "fps": 16,
      "steps": 24,
      "guidance_scale": 5,
      "visual_rating_10": null,
      "prompt_match_10": null,
      "quality_rating_10": null,
      "verdict": "Tensor channel mismatch in image-conditioning path; model adapter does not match runtime input shape.",
      "matched_prompt_elements": [],
      "missed_prompt_elements": [],
      "failure_class": "Tensor channel mismatch in image-conditioning path; model adapter does not match runtime input shape.",
      "error_excerpt": "[perknation@192.168.12.251] {\"ok\": false, \"error\": \"Given groups=1, weight of size [5120, 36, 1, 2, 2], expected input[1, 16, 25, 68, 120] to have 36 channels, but got 16 channels instead Hint: This model needs a source image. Provide inputImageBase64 or inputAssetId from the public page.\"}"
    },
    {
      "model": "Stable Video Diffusion XT",
      "model_id": "stabilityai/stable-video-diffusion-img2vid-xt",
      "task": "image-to-video",
      "status": "failed",
      "latency_seconds": 7.71,
      "resolution": "1024x576",
      "frames": 25,
      "fps": 7,
      "steps": 18,
      "guidance_scale": 2.5,
      "visual_rating_10": null,
      "prompt_match_10": null,
      "quality_rating_10": null,
      "verdict": "Runtime API mismatch: guidance_scale was passed to a pipeline that does not accept it.",
      "matched_prompt_elements": [],
      "missed_prompt_elements": [],
      "failure_class": "Runtime API mismatch: guidance_scale was passed to a pipeline that does not accept it.",
      "error_excerpt": "[perknation@192.168.12.251] {\"ok\": false, \"error\": \"StableVideoDiffusionPipeline.__call__() got an unexpected keyword argument 'guidance_scale' Hint: This model needs a source image. Provide inputImageBase64 or inputAssetId from the public page. Stable Video Diffusion XT is image-to-video only. Use a clear source frame and expect short clips.\"}"
    }
  ],
  "recommendations": [
    "Add low-memory presets for Mochi, LTX, and 14B SkyReels before public default use.",
    "Exclude unsupported kwargs per pipeline, especially Stable Video Diffusion guidance_scale.",
    "Split complex cinematic prompts into keyframe-first workflows with explicit source frames for each major beat.",
    "Fix image-conditioning adapters for Hunyuan/SkyReels I2V before judging creative quality.",
    "After SSH timeouts, explicitly terminate the remote process group to prevent stale GPU jobs."
  ]
}
