Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions configs/config-solar-pro2-250909-high-reasoning.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
wandb:
run_name: 'upstage/solar-pro2-250909-high-reasoning'
project: 'nejumi-leaderboard-4-upstage'

api: upstage
batch_size: 32
testmode: true # Run lightweight test with small number of questions
inference_interval: 0.1 # 100ms delay between API calls to prevent rate limiting

model:
pretrained_model_name_or_path: solar-pro2
bfcl_model_id: "solar-pro2"
base_model: "unknown"
size_category: api
size: null
release_date: 09/09/2025

generator:
reasoning_effort: "high"

# BFCL-specific generator configuration (used by UpstageHandler)
bfcl:
generator_config:
reasoning_effort: "high"
44 changes: 44 additions & 0 deletions configs/config-solar-pro2-250909-minimal-reasoning.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
wandb:
run_name: 'upstage/solar-pro2-250909-minimal-reasoning'
project: 'nejumi-leaderboard-4-upstage'

api: upstage
batch_size: 32
testmode: true # Run lightweight test with small number of questions
inference_interval: 0.1 # 100ms delay between API calls to prevent rate limiting

#Override run configuration to not test swebench
#REMOVE this override once test is done
run:
bfcl: false
swebench: true
mtbench: true
jbbq: false
toxicity: true
jtruthfulqa: false
hle: false
hallulens: false
arc_agi: false
m_ifeval: false
jaster: false
jmmlu_robustness: false
aggregate: false

model:
pretrained_model_name_or_path: solar-pro2
bfcl_model_id: "solar-pro2"
base_model: "unknown"
size_category: api
size: null
release_date: 09/09/2025

generator:
reasoning_effort: "minimal"

# BFCL-specific generator configuration (used by UpstageHandler)
bfcl:
generator_config:
reasoning_effort: "minimal"

swebench:
evaluation_method: 'docker'
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ These unified handlers eliminate the need to configure individual model-specific
| xLAM-2-3b-fc-r | Function Calling | Self-hosted 💻 | Salesforce/xLAM-2-3b-fc-r |
| xLAM-2-70b-fc-r | Function Calling | Self-hosted 💻 | Salesforce/Llama-xLAM-2-70b-fc-r |
| xLAM-2-8b-fc-r | Function Calling | Self-hosted 💻 | Salesforce/Llama-xLAM-2-8b-fc-r |
| Upstage (Generic Handler) | Function Calling | Upstage | upstage-FC |
| Upstage (Generic Handler) | Prompt | Upstage | solar-pro2 |
| PLaMo-2.0-Prime | Function Calling | Preferred AI | PLaMo-2.0-Prime-FC |
| PLaMo-2.0-Prime | Prompt | Preferred AI | PLaMo-2.0-Prime |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,18 @@ class ModelConfig:
is_fc_model=False,
underscore_to_dot=False,
),
"solar-pro2": ModelConfig(
model_name="solar-pro2",
display_name="solar-pro2 (Prompt)",
url="https://console.upstage.ai/api/chat",
org="Upstage",
license="Proprietary",
model_handler=UpstageHandler,
input_price=None, # Add pricing information if available
output_price=None,
is_fc_model=False,
underscore_to_dot=False,
),
}

# Inference through local hosting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
"qwq-32b-FC",
"qwq-32b",
"xiaoming-14B",
"upstage-FC",
"solar-pro2",
"deepseek-ai/DeepSeek-R1",
"google/gemma-3-1b-it",
"google/gemma-3-4b-it",
Expand Down
Loading