SGI-Bench Leaderboard 🏆

{
  • "headers": [
    • "Model",
    • "Type",
    • "SGI-Score",
    • "Deep Research",
    • "Idea Generation",
    • "Dry Experiment",
    • "Wet Experiment",
    • "Experimental Reasoning"
    ],
  • "data": [
    • [
      • "Gemini-3-Pro",
      • "Closed",
      • 33.83,
      • 18.48,
      • 39.68,
      • 36.64,
      • 32.45,
      • 41.92
      ],
    • [
      • "Claude-Sonnet-4.5",
      • "Closed",
      • 32.16,
      • 13.84,
      • 43.2,
      • 35.79,
      • 30.15,
      • 37.8
      ],
    • [
      • "Qwen3-Max",
      • "Open",
      • 31.97,
      • 15.38,
      • 39.83,
      • 33.21,
      • 33.62,
      • 37.8
      ],
    • [
      • "GPT-4.1",
      • "Closed",
      • 31.45,
      • 11.32,
      • 36.49,
      • 34.32,
      • 36.63,
      • 38.49
      ],
    • [
      • "GPT-5",
      • "Closed",
      • 30.84,
      • 14.47,
      • 55.4,
      • 29.89,
      • 16.31,
      • 38.14
      ],
    • [
      • "o3",
      • "Closed",
      • 30.68,
      • 12.89,
      • 46.07,
      • 31.73,
      • 30.04,
      • 32.65
      ],
    • [
      • "Claude-Opus-4.1",
      • "Closed",
      • 30.42,
      • 12.93,
      • 40.29,
      • 34.69,
      • 25.38,
      • 38.83
      ],
    • [
      • "o4-mini",
      • "Closed",
      • 30.14,
      • 11.95,
      • 40.78,
      • 35.79,
      • 28.86,
      • 33.33
      ],
    • [
      • "GPT-5.1",
      • "Closed",
      • 29.31,
      • 11.64,
      • 47.12,
      • 31,
      • 22.77,
      • 34.02
      ],
    • [
      • "Grok-4",
      • "Closed",
      • 28.68,
      • 13.31,
      • 37.12,
      • 33.71,
      • 29.01,
      • 30.24
      ],
    • [
      • "Qwen3-VL-235B-A22B",
      • "Open",
      • 28.32,
      • 11.97,
      • 39.28,
      • 28.41,
      • 30.3,
      • 31.62
      ],
    • [
      • "Gemini-2.5-Pro",
      • "Closed",
      • 28.17,
      • 15.09,
      • 39.95,
      • 22.51,
      • 22.05,
      • 41.24
      ],
    • [
      • "Intern-S1",
      • "Open",
      • 28.1,
      • 15.74,
      • 38.09,
      • 28.79,
      • 29.02,
      • 28.87
      ],
    • [
      • "GPT-4o",
      • "Closed",
      • 26.87,
      • 7.86,
      • 35.95,
      • 26.94,
      • 31.31,
      • 32.3
      ],
    • [
      • "Gemini-2.5-Flash",
      • "Closed",
      • 24.75,
      • 10.69,
      • 39.13,
      • 21.03,
      • 18.55,
      • 34.36
      ],
    • [
      • "Llama-4-Scout",
      • "Open",
      • 21.08,
      • 7.86,
      • 29.72,
      • 20.37,
      • 21.66,
      • 25.77
      ],
    • [
      • "Qwen3-8B",
      • "Open",
      • 19.15,
      • 8.18,
      • 35.78,
      • 18.45,
      • 9.96,
      • 23.37
      ],
    • [
      • "Intern-S1-mini",
      • "Open",
      • 18.67,
      • 11.06,
      • 36.04,
      • 16.97,
      • 12.42,
      • 16.84
      ]
    ],
  • "metadata": null
}