What to measure
Three metrics matter in combinatorial optimization benchmarking: optimality gap (how far above the known best solution your result is), wall-clock time, and solution feasibility rate (what fraction of runs satisfy all constraints). Always report all three — a fast solver that ignores constraints is useless.
Optimality gap formula
def gap(found, optimal):
"""Percentage above optimal (lower is better). Never negative."""
return (found - optimal) / optimal * 100
# Example: NEROX found 427 on eil51, optimal is 426
print(f"Gap: {gap(427, 426):.2f}%") # 0.24%Running a reproducible benchmark
import nerox
import numpy as np
def benchmark_tsp(distance_matrix, known_optimal, n_trials=10, solver="gpu"):
"""Run multiple trials and report mean gap + std dev."""
client = nerox.Client()
results = []
for trial in range(n_trials):
job = client.optimize.tsp(
distance_matrix=distance_matrix,
solver=solver,
n_runs=512,
seed=trial, # fix seed per trial for reproducibility
)
r = job.wait(timeout=300)
gap_pct = (r.objective - known_optimal) / known_optimal * 100
results.append({
"trial": trial,
"objective": r.objective,
"gap_pct": gap_pct,
"runtime_s": r.runtime_s,
})
gaps = [r["gap_pct"] for r in results]
print(f"Mean gap: {np.mean(gaps):.3f}% ± {np.std(gaps):.3f}%")
print(f"Best gap: {min(gaps):.3f}%")
print(f"Mean runtime: {np.mean([r['runtime_s'] for r in results]):.1f}s")
return resultsStandard benchmark instances
NEROX ships TSPLIB instances in the Dataset Hub. Load any instance by ID — no manual file management needed.
import nerox
client = nerox.Client()
# List available benchmark datasets
datasets = client.datasets.list(tag="tsplib")
for d in datasets:
print(d.name, d.n_cities, d.known_optimal)
# Load a specific instance
eil51 = client.datasets.get("tsplib/eil51")
matrix = eil51.distance_matrix # numpy array
job = client.optimize.tsp(distance_matrix=matrix, solver="gpu")
result = job.wait()
print(f"Gap: {(result.objective - eil51.known_optimal) / eil51.known_optimal * 100:.3f}%")Published benchmark results
The table below shows median gap over 10 trials on TSPLIB instances. GPU Annealing uses 512 runs × 20,000 sweeps on a single A100. Hybrid Solver used for instances >2,000 cities.
Comparing solvers head-to-head
import nerox
import numpy as np
client = nerox.Client()
matrix = client.datasets.get("tsplib/pr1002").distance_matrix
solvers = ["gpu", "tabu", "hybrid"]
for solver in solvers:
jobs = [
client.optimize.tsp(distance_matrix=matrix, solver=solver, seed=i)
for i in range(5)
]
objectives = [j.wait().objective for j in jobs]
print(f"{solver:10s}: {np.mean(objectives):.0f} ± {np.std(objectives):.0f}")Using result.gap_to_best
When a known optimal is available in the Dataset Hub, the API automatically populates result.gap_to_best so you don't need to compute it manually. For custom instances where no optimal is known, this field is None.
