{
  "title": "Completion vs benchmark cost",
  "description": "Who clears the suite most completely for the least total benchmark spend.",
  "data": [
    {
      "type": "scatter",
      "mode": "markers+text",
      "x": [
        0.4215,
        0.9122,
        21.8757,
        6.4339,
        0,
        8.9827,
        11.8406,
        1.8816,
        0.6413,
        1.0606,
        0,
        2.4307,
        5.8536,
        0
      ],
      "y": [
        0.852,
        0.889,
        0.889,
        0.778,
        0.667,
        0.778,
        0.778,
        0.667,
        0.556,
        0.481,
        0.593,
        0.593,
        0.37,
        0.259
      ],
      "text": [
        "opencode/gpt-5.4-nano",
        "opencode/kimi-k2.5",
        "opencode/claude-opus-4-6",
        "opencode/glm-5",
        "opencode/big-pickle",
        "opencode/gpt-5.4",
        "opencode/claude-sonnet-4-6",
        "opencode/glm-5.1",
        "opencode/minimax-m2.5",
        "opencode/gpt-5.4-mini",
        "opencode/minimax-m2.5-free",
        "opencode/gemini-3-flash",
        "opencode/gemini-3.1-pro",
        "opencode/nemotron-3-super-free"
      ],
      "textposition": "top center",
      "textfont": {
        "color": "#edf2ff",
        "size": 12
      },
      "marker": {
        "size": [
          30,
          30,
          28,
          27,
          27,
          27,
          27,
          26,
          25,
          24,
          23,
          23,
          21,
          19
        ],
        "color": [
          "#e879f9",
          "#a78bfa",
          "#fb7185",
          "#4ade80",
          "#38bdf8",
          "#a78bfa",
          "#fb7185",
          "#f472b6",
          "#e879f9",
          "#f472b6",
          "#84cc16",
          "#f97316",
          "#f59e0b",
          "#f472b6"
        ],
        "line": {
          "color": "rgba(255,255,255,0.22)",
          "width": 1.5
        }
      },
      "customdata": [
        [
          "0.789",
          "15.17",
          "413",
          "27m 33s"
        ],
        [
          "0.785",
          "14.25",
          "375",
          "41m 05s"
        ],
        [
          "0.67",
          "14.88",
          "418",
          "40m 04s"
        ],
        [
          "0.623",
          "11.57",
          "280",
          "20m 10s"
        ],
        [
          "0.615",
          "15.39",
          "414",
          "36m 28s"
        ],
        [
          "0.609",
          "11.00",
          "280",
          "32m 47s"
        ],
        [
          "0.593",
          "16.43",
          "403",
          "42m 31s"
        ],
        [
          "0.547",
          "12.06",
          "286",
          "64m 39s"
        ],
        [
          "0.481",
          "18.87",
          "417",
          "32m 15s"
        ],
        [
          "0.425",
          "9.54",
          "264",
          "21m 48s"
        ],
        [
          "0.415",
          "16.19",
          "475",
          "41m 34s"
        ],
        [
          "0.415",
          "21.81",
          "508",
          "62m 52s"
        ],
        [
          "0.291",
          "12.70",
          "307",
          "51m 25s"
        ],
        [
          "0.181",
          "19.43",
          "502",
          "109m 00s"
        ]
      ],
      "hovertemplate": "%{text}<br>Total cost: $%{x:.4f}<br>Completion: %{y:.3f}<br>Composite: %{customdata[0]}<br>ORPT: %{customdata[1]}<br>Requests: %{customdata[2]}<br>Wall time: %{customdata[3]}<extra></extra>"
    }
  ],
  "layout": {
    "paper_bgcolor": "#08101f",
    "plot_bgcolor": "#0d1830",
    "font": {
      "family": "Inter, ui-sans-serif, system-ui, sans-serif",
      "color": "#edf2ff",
      "size": 13
    },
    "title": "Completion vs benchmark cost",
    "hoverlabel": {
      "bgcolor": "#0a1324",
      "bordercolor": "rgba(153, 164, 200, 0.22)",
      "font": {
        "color": "#edf2ff"
      }
    },
    "legend": {
      "font": {
        "color": "#99a4c8"
      },
      "orientation": "h",
      "x": 0,
      "y": 1.12
    },
    "margin": {
      "l": 72,
      "r": 24,
      "t": 72,
      "b": 82
    },
    "height": 520,
    "xaxis": {
      "automargin": true,
      "tickfont": {
        "size": 12,
        "color": "#99a4c8"
      },
      "titlefont": {
        "color": "#edf2ff",
        "size": 13
      },
      "gridcolor": "rgba(153, 164, 200, 0.16)",
      "linecolor": "rgba(153, 164, 200, 0.22)",
      "zerolinecolor": "rgba(153, 164, 200, 0.16)",
      "tickangle": -18,
      "title": "Total benchmark cost (USD)"
    },
    "yaxis": {
      "automargin": true,
      "zeroline": false,
      "tickfont": {
        "size": 12,
        "color": "#99a4c8"
      },
      "titlefont": {
        "color": "#edf2ff",
        "size": 13
      },
      "gridcolor": "rgba(153, 164, 200, 0.16)",
      "linecolor": "rgba(153, 164, 200, 0.22)",
      "title": "Completion score",
      "range": [
        0,
        1.05
      ]
    }
  }
}
