{
  "title": "Difficulty success heatmap",
  "description": "Average success rate by task difficulty and model.",
  "data": [
    {
      "type": "heatmap",
      "x": [
        "opencode/gpt-5.4-nano",
        "opencode/kimi-k2.5",
        "opencode/claude-opus-4-6",
        "opencode/glm-5",
        "opencode/big-pickle",
        "opencode/gpt-5.4",
        "opencode/claude-sonnet-4-6",
        "opencode/glm-5.1",
        "opencode/minimax-m2.5",
        "opencode/gpt-5.4-mini",
        "opencode/minimax-m2.5-free",
        "opencode/gemini-3-flash",
        "opencode/gemini-3.1-pro",
        "opencode/nemotron-3-super-free"
      ],
      "y": [
        "control (3 tasks)",
        "expert (12 tasks)",
        "high (10 tasks)",
        "medium (2 tasks)"
      ],
      "z": [
        [
          1,
          0.6666666666666666,
          1,
          1,
          0.6666666666666666,
          1,
          1,
          0.3333333333333333,
          0.3333333333333333,
          1,
          0.3333333333333333,
          0,
          0,
          0
        ],
        [
          0.8333333333333334,
          1,
          0.9166666666666666,
          0.75,
          0.6666666666666666,
          0.6666666666666666,
          0.8333333333333334,
          0.75,
          0.5833333333333334,
          0.25,
          0.5,
          0.6666666666666666,
          0.25,
          0.16666666666666666
        ],
        [
          0.9,
          0.9,
          0.9,
          0.7,
          0.7,
          0.9,
          0.7,
          0.7,
          0.6,
          0.6,
          0.8,
          0.7,
          0.7,
          0.5
        ],
        [
          0.5,
          0.5,
          0.5,
          1,
          0.5,
          0.5,
          0.5,
          0.5,
          0.5,
          0.5,
          0.5,
          0.5,
          0,
          0
        ]
      ],
      "text": [
        [
          "100%",
          "67%",
          "100%",
          "100%",
          "67%",
          "100%",
          "100%",
          "33%",
          "33%",
          "100%",
          "33%",
          "0%",
          "0%",
          "0%"
        ],
        [
          "83%",
          "100%",
          "92%",
          "75%",
          "67%",
          "67%",
          "83%",
          "75%",
          "58%",
          "25%",
          "50%",
          "67%",
          "25%",
          "17%"
        ],
        [
          "90%",
          "90%",
          "90%",
          "70%",
          "70%",
          "90%",
          "70%",
          "70%",
          "60%",
          "60%",
          "80%",
          "70%",
          "70%",
          "50%"
        ],
        [
          "50%",
          "50%",
          "50%",
          "100%",
          "50%",
          "50%",
          "50%",
          "50%",
          "50%",
          "50%",
          "50%",
          "50%",
          "0%",
          "0%"
        ]
      ],
      "customdata": [
        [
          "Avg success: 100%<br>Avg composite: 0.921<br>Avg requests: 10.33<br>Avg cost: $0.0103<br>Tasks in band: 3",
          "Avg success: 67%<br>Avg composite: 0.584<br>Avg requests: 6.33<br>Avg cost: $0.0143<br>Tasks in band: 3",
          "Avg success: 100%<br>Avg composite: 0.757<br>Avg requests: 9.00<br>Avg cost: $0.5938<br>Tasks in band: 3",
          "Avg success: 100%<br>Avg composite: 0.794<br>Avg requests: 10.00<br>Avg cost: $0.2064<br>Tasks in band: 3",
          "Avg success: 67%<br>Avg composite: 0.655<br>Avg requests: 8.33<br>Avg cost: $0.0000<br>Tasks in band: 3",
          "Avg success: 100%<br>Avg composite: 0.781<br>Avg requests: 8.00<br>Avg cost: $0.2523<br>Tasks in band: 3",
          "Avg success: 100%<br>Avg composite: 0.768<br>Avg requests: 9.67<br>Avg cost: $0.3594<br>Tasks in band: 3",
          "Avg success: 33%<br>Avg composite: 0.279<br>Avg requests: 7.33<br>Avg cost: $0.0150<br>Tasks in band: 3",
          "Avg success: 33%<br>Avg composite: 0.284<br>Avg requests: 11.67<br>Avg cost: $0.0121<br>Tasks in band: 3",
          "Avg success: 100%<br>Avg composite: 0.885<br>Avg requests: 8.00<br>Avg cost: $0.0317<br>Tasks in band: 3",
          "Avg success: 33%<br>Avg composite: 0.233<br>Avg requests: 10.33<br>Avg cost: $0.0000<br>Tasks in band: 3",
          "Avg success: 0%<br>Avg composite: 0.0<br>Avg requests: 11.00<br>Avg cost: $0.0000<br>Tasks in band: 3",
          "Avg success: 0%<br>Avg composite: 0.0<br>Avg requests: 7.67<br>Avg cost: $0.0000<br>Tasks in band: 3",
          "Avg success: 0%<br>Avg composite: 0.0<br>Avg requests: 6.33<br>Avg cost: $0.0000<br>Tasks in band: 3"
        ],
        [
          "Avg success: 83%<br>Avg composite: 0.78<br>Avg requests: 16.08<br>Avg cost: $0.0163<br>Tasks in band: 12",
          "Avg success: 100%<br>Avg composite: 0.877<br>Avg requests: 15.50<br>Avg cost: $0.0407<br>Tasks in band: 12",
          "Avg success: 92%<br>Avg composite: 0.692<br>Avg requests: 16.08<br>Avg cost: $0.8533<br>Tasks in band: 12",
          "Avg success: 75%<br>Avg composite: 0.597<br>Avg requests: 11.92<br>Avg cost: $0.2813<br>Tasks in band: 12",
          "Avg success: 67%<br>Avg composite: 0.612<br>Avg requests: 17.58<br>Avg cost: $0.0000<br>Tasks in band: 12",
          "Avg success: 67%<br>Avg composite: 0.523<br>Avg requests: 10.83<br>Avg cost: $0.3665<br>Tasks in band: 12",
          "Avg success: 83%<br>Avg composite: 0.636<br>Avg requests: 16.25<br>Avg cost: $0.4507<br>Tasks in band: 12",
          "Avg success: 75%<br>Avg composite: 0.619<br>Avg requests: 11.75<br>Avg cost: $0.0917<br>Tasks in band: 12",
          "Avg success: 58%<br>Avg composite: 0.503<br>Avg requests: 16.75<br>Avg cost: $0.0273<br>Tasks in band: 12",
          "Avg success: 25%<br>Avg composite: 0.223<br>Avg requests: 9.50<br>Avg cost: $0.0406<br>Tasks in band: 12",
          "Avg success: 50%<br>Avg composite: 0.35<br>Avg requests: 22.58<br>Avg cost: $0.0000<br>Tasks in band: 12",
          "Avg success: 67%<br>Avg composite: 0.467<br>Avg requests: 22.75<br>Avg cost: $0.1072<br>Tasks in band: 12",
          "Avg success: 25%<br>Avg composite: 0.188<br>Avg requests: 13.42<br>Avg cost: $0.3058<br>Tasks in band: 12",
          "Avg success: 17%<br>Avg composite: 0.117<br>Avg requests: 19.33<br>Avg cost: $0.0000<br>Tasks in band: 12"
        ],
        [
          "Avg success: 90%<br>Avg composite: 0.824<br>Avg requests: 15.00<br>Avg cost: $0.0154<br>Tasks in band: 10",
          "Avg success: 90%<br>Avg composite: 0.809<br>Avg requests: 14.20<br>Avg cost: $0.0312<br>Tasks in band: 10",
          "Avg success: 90%<br>Avg composite: 0.675<br>Avg requests: 17.30<br>Avg cost: $0.8357<br>Tasks in band: 10",
          "Avg success: 70%<br>Avg composite: 0.564<br>Avg requests: 8.30<br>Avg cost: $0.1886<br>Tasks in band: 10",
          "Avg success: 70%<br>Avg composite: 0.641<br>Avg requests: 12.80<br>Avg cost: $0.0000<br>Tasks in band: 10",
          "Avg success: 90%<br>Avg composite: 0.702<br>Avg requests: 10.60<br>Avg cost: $0.3253<br>Tasks in band: 10",
          "Avg success: 70%<br>Avg composite: 0.529<br>Avg requests: 15.10<br>Avg cost: $0.4479<br>Tasks in band: 10",
          "Avg success: 70%<br>Avg composite: 0.571<br>Avg requests: 10.30<br>Avg cost: $0.0586<br>Tasks in band: 10",
          "Avg success: 60%<br>Avg composite: 0.519<br>Avg requests: 14.50<br>Avg cost: $0.0224<br>Tasks in band: 10",
          "Avg success: 60%<br>Avg composite: 0.514<br>Avg requests: 10.90<br>Avg cost: $0.0421<br>Tasks in band: 10",
          "Avg success: 80%<br>Avg composite: 0.56<br>Avg requests: 15.00<br>Avg cost: $0.0000<br>Tasks in band: 10",
          "Avg success: 70%<br>Avg composite: 0.49<br>Avg requests: 18.30<br>Avg cost: $0.0989<br>Tasks in band: 10",
          "Avg success: 70%<br>Avg composite: 0.56<br>Avg requests: 10.30<br>Avg cost: $0.1898<br>Tasks in band: 10",
          "Avg success: 50%<br>Avg composite: 0.35<br>Avg requests: 20.80<br>Avg cost: $0.0000<br>Tasks in band: 10"
        ],
        [
          "Avg success: 50%<br>Avg composite: 0.476<br>Avg requests: 19.50<br>Avg cost: $0.0205<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.411<br>Avg requests: 14.00<br>Avg cost: $0.0346<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.388<br>Avg requests: 12.50<br>Avg cost: $0.7488<br>Tasks in band: 2",
          "Avg success: 100%<br>Avg composite: 0.82<br>Avg requests: 12.00<br>Avg cost: $0.2769<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.444<br>Avg requests: 25.00<br>Avg cost: $0.0000<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.395<br>Avg requests: 10.00<br>Avg cost: $0.2875<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.392<br>Avg requests: 14.00<br>Avg cost: $0.4379<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.399<br>Avg requests: 10.00<br>Avg cost: $0.0750<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.452<br>Avg requests: 18.00<br>Avg cost: $0.0267<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.5<br>Avg requests: 8.50<br>Avg cost: $0.0285<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.35<br>Avg requests: 11.50<br>Avg cost: $0.0000<br>Tasks in band: 2",
          "Avg success: 50%<br>Avg composite: 0.35<br>Avg requests: 9.50<br>Avg cost: $0.0774<br>Tasks in band: 2",
          "Avg success: 0%<br>Avg composite: 0.0<br>Avg requests: 10.00<br>Avg cost: $0.1430<br>Tasks in band: 2",
          "Avg success: 0%<br>Avg composite: 0.0<br>Avg requests: 21.50<br>Avg cost: $0.0000<br>Tasks in band: 2"
        ]
      ],
      "texttemplate": "%{text}",
      "textfont": {
        "size": 11,
        "color": "#edf2ff"
      },
      "colorscale": [
        [
          0,
          "#ef4444"
        ],
        [
          0.5,
          "#f59e0b"
        ],
        [
          1,
          "#22c55e"
        ]
      ],
      "zmin": 0,
      "zmax": 1,
      "xgap": 2,
      "ygap": 2,
      "hovertemplate": "%{x}<br>%{y}<br>%{customdata}<extra></extra>",
      "colorbar": {
        "tickfont": {
          "color": "#99a4c8"
        }
      }
    }
  ],
  "layout": {
    "paper_bgcolor": "#08101f",
    "plot_bgcolor": "#0d1830",
    "font": {
      "family": "Inter, ui-sans-serif, system-ui, sans-serif",
      "color": "#edf2ff",
      "size": 13
    },
    "title": "Difficulty success heatmap",
    "hoverlabel": {
      "bgcolor": "#0a1324",
      "bordercolor": "rgba(153, 164, 200, 0.22)",
      "font": {
        "color": "#edf2ff"
      }
    },
    "legend": {
      "font": {
        "color": "#99a4c8"
      },
      "orientation": "h",
      "x": 0,
      "y": 1.12
    },
    "margin": {
      "l": 180,
      "r": 24,
      "t": 54,
      "b": 90
    },
    "height": 428,
    "xaxis": {
      "automargin": true,
      "tickfont": {
        "size": 12,
        "color": "#99a4c8"
      },
      "titlefont": {
        "color": "#edf2ff",
        "size": 13
      },
      "gridcolor": "rgba(153, 164, 200, 0.16)",
      "linecolor": "rgba(153, 164, 200, 0.22)",
      "zerolinecolor": "rgba(153, 164, 200, 0.16)",
      "tickangle": -24
    },
    "yaxis": {
      "automargin": true,
      "zeroline": false,
      "tickfont": {
        "size": 12,
        "color": "#99a4c8"
      },
      "titlefont": {
        "color": "#edf2ff",
        "size": 13
      },
      "gridcolor": "rgba(153, 164, 200, 0.16)",
      "linecolor": "rgba(153, 164, 200, 0.22)",
      "autorange": "reversed"
    }
  }
}
