/**
 * List of supported Evaluation Frameworks supported in the `eval.yaml` file in benchmarks datasets.
 */
export declare const EVALUATION_FRAMEWORKS: {
    readonly "inspect-ai": {
        readonly name: "inspect-ai";
        readonly description: "Inspect AI is an open-source framework for large language model evaluations.";
        readonly url: "https://inspect.aisi.org.uk/";
    };
    readonly "math-arena": {
        readonly name: "math-arena";
        readonly description: "MathArena is a platform for evaluation of LLMs on latest math competitions and olympiads.";
        readonly url: "https://github.com/eth-sri/matharena";
    };
    readonly mteb: {
        readonly name: "mteb";
        readonly description: "Multimodal toolbox for evaluating embeddings and retrieval systems.";
        readonly url: "https://github.com/embeddings-benchmark/mteb";
    };
    readonly "olmocr-bench": {
        readonly name: "olmocr-bench";
        readonly description: "olmOCR-Bench is a framework for evaluating document-level OCR of various tools.";
        readonly url: "https://github.com/allenai/olmocr/tree/main/olmocr/bench";
    };
    readonly harbor: {
        readonly name: "harbor";
        readonly description: "Harbor is a framework for evaluating and optimizing agents and language models.";
        readonly url: "https://github.com/laude-institute/harbor";
    };
    readonly archipelago: {
        readonly name: "archipelago";
        readonly description: "Archipelago is a system for running and evaluating AI agents against MCP applications.";
        readonly url: "https://github.com/Mercor-Intelligence/archipelago";
    };
    readonly "swe-bench": {
        readonly name: "swe-bench";
        readonly description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.";
        readonly url: "https://github.com/swe-bench/swe-bench";
    };
    readonly "swe-bench-pro": {
        readonly name: "swe-bench-pro";
        readonly description: "SWE-Bench Pro is a challenging benchmark evaluating LLMs/Agents on long-horizon software engineering tasks.";
        readonly url: "https://github.com/scaleapi/SWE-bench_Pro-os";
    };
    readonly "nemo-evaluator": {
        readonly name: "nemo-evaluator";
        readonly description: "NeMo Evaluator is an open-source platform for robust, reproducible, and scalable evaluation of Large Language Models across 100+ benchmarks.";
        readonly url: "https://github.com/NVIDIA-NeMo/Evaluator";
    };
};
//# sourceMappingURL=eval.d.ts.map