{
- "headers": [
- "Model",
- "Type",
- "SGI-Score",
- "Deep Research",
- "Idea Generation",
- "Dry Experiment",
- "Wet Experiment",
- "Experimental Reasoning"
- "data": [
- [
- "Gemini-3-Pro",
- "Closed",
- 33.83,
- 18.48,
- 39.68,
- 36.64,
- 32.45,
- 41.92
- [
- "Claude-Sonnet-4.5",
- "Closed",
- 32.16,
- 13.84,
- 43.2,
- 35.79,
- 30.15,
- 37.8
- [
- "Qwen3-Max",
- "Open",
- 31.97,
- 15.38,
- 39.83,
- 33.21,
- 33.62,
- 37.8
- [
- "GPT-4.1",
- "Closed",
- 31.45,
- 11.32,
- 36.49,
- 34.32,
- 36.63,
- 38.49
- [
- "GPT-5",
- "Closed",
- 30.84,
- 14.47,
- 55.4,
- 29.89,
- 16.31,
- 38.14
- [
- "o3",
- "Closed",
- 30.68,
- 12.89,
- 46.07,
- 31.73,
- 30.04,
- 32.65
- [
- "Claude-Opus-4.1",
- "Closed",
- 30.42,
- 12.93,
- 40.29,
- 34.69,
- 25.38,
- 38.83
- [
- "o4-mini",
- "Closed",
- 30.14,
- 11.95,
- 40.78,
- 35.79,
- 28.86,
- 33.33
- [
- "GPT-5.1",
- "Closed",
- 29.31,
- 11.64,
- 47.12,
- 31,
- 22.77,
- 34.02
- [
- "Grok-4",
- "Closed",
- 28.68,
- 13.31,
- 37.12,
- 33.71,
- 29.01,
- 30.24
- [
- "Qwen3-VL-235B-A22B",
- "Open",
- 28.32,
- 11.97,
- 39.28,
- 28.41,
- 30.3,
- 31.62
- [
- "Gemini-2.5-Pro",
- "Closed",
- 28.17,
- 15.09,
- 39.95,
- 22.51,
- 22.05,
- 41.24
- [
- "Intern-S1",
- "Open",
- 28.1,
- 15.74,
- 38.09,
- 28.79,
- 29.02,
- 28.87
- [
- "GPT-4o",
- "Closed",
- 26.87,
- 7.86,
- 35.95,
- 26.94,
- 31.31,
- 32.3
- [
- "Gemini-2.5-Flash",
- "Closed",
- 24.75,
- 10.69,
- 39.13,
- 21.03,
- 18.55,
- 34.36
- [
- "Llama-4-Scout",
- "Open",
- 21.08,
- 7.86,
- 29.72,
- 20.37,
- 21.66,
- 25.77
- [
- "Qwen3-8B",
- "Open",
- 19.15,
- 8.18,
- 35.78,
- 18.45,
- 9.96,
- 23.37
- [
- "Intern-S1-mini",
- "Open",
- 18.67,
- 11.06,
- 36.04,
- 16.97,
- 12.42,
- 16.84
- [
- "metadata": null