{
  "benchmark_id": "f5-tts-titan-20260415",
  "date": "2026-04-15",
  "session": 107,
  "branch": "cosyvoice2-bench-20260415",
  "target_model": "SWivid/F5-TTS v1.1.18",
  "target_model_release": "2026-03-24",
  "verdict": "rejected_weight_license_cc_by_nc_4_0",
  "benchmark_run": false,
  "blocker": {
    "type": "license",
    "detail": "Pretrained weights are CC-BY-NC-4.0 (Creative Commons Attribution-NonCommercial 4.0). Training dataset Emilia is in-the-wild and non-commercial, which contaminates weight license regardless of MIT code license.",
    "primary_source_1": {
      "url": "https://github.com/SWivid/F5-TTS",
      "quote": "The pre-trained models are licensed under the CC-BY-NC license due to the training data Emilia, which is an in-the-wild dataset."
    },
    "primary_source_2": {
      "url": "https://huggingface.co/SWivid/F5-TTS",
      "license_tag": "cc-by-nc-4.0"
    },
    "fixable_upstream": false,
    "fix_would_require": "Retraining F5-TTS weights from scratch on permissive-license dataset. No fork has done this."
  },
  "gate_failed": "license (her-os targets personal + commercial distribution; CC-BY-NC blocks)",
  "other_positive_findings": {
    "streaming_inference_supported": true,
    "streaming_reference": "Release v1.1.18 notes separate streaming and non-streaming functions in utils_infer.py",
    "cli_signature_understood": "f5-tts_infer-cli --model F5TTS_v1_Base --ref_audio PATH --ref_text 'TRANSCRIPT' --gen_text 'TARGET'",
    "python_version": ">=3.10",
    "pretrained_checkpoints": ["F5TTS_v1_Base (model_1250000.safetensors)", "F5TTS_Base (model_1200000.safetensors)", "E2TTS_Base (model_1200000.safetensors)"],
    "install_methods_available": ["pip install f5-tts", "ghcr.io/swivid/f5-tts:main Docker", "source build (pip install -e .)"]
  },
  "aarch64_blackwell_status": "untested — license block resolved first; README mentions only CUDA 12.4/12.8 tested, no SM_121 or DGX Spark references",
  "time_spent_minutes": 20,
  "samantha_reference_status": "available at services/audio-pipeline/voice-references/samantha_movie_primary.wav (34.7s, 24 kHz mono). Remains valid for Chatterbox.",
  "chatterbox_gemma_impact": {
    "gemma_paused": false,
    "pre_flight_p50_ms": 190,
    "post_flight_p50_ms": "n/a (no TTS install attempted, Gemma never paused)",
    "drift_ratio": "n/a"
  },
  "decision_rule_applied": {
    "rule": "Plan §123: f5_mean ≥ chatterbox_mean + 0.5 AND f5_ttfa_p50_ms ≤ 500 → swap_candidate; else → try CosyVoice 2",
    "result": "Gate short-circuited by license block — no A/B score needed; proceed to CosyVoice 2 path"
  },
  "next_action": "CosyVoice 2 primary-source research (see BENCHMARK-COSYVOICE2-TITAN-20260415.json)"
}
