{% from "macros/breadcrumb.html" import breadcrumb %} {% from "macros/interpret_safety_ratings.html" import interpret_safety_ratings %} {% from "macros/sut_card.html" import sut_card %} {% from "macros/test_runs.html" import test_runs %} {% from "macros/use_hazards_limitations.html" import use_hazards_limitations %} {% extends "base.html" %} {% block title %}{{ benchmark_name }} Benchmark{% endblock %} {% block content %}
{{ breadcrumb(benchmark_score, benchmark_score.benchmark_definition, page_type=page_type, view_embed=view_embed, mlc_branding=mlc_branding) }}

{% if mlc_branding %}MLCommons AI Safety {% endif %}v0.5 Benchmark Report

{{ content(benchmark_score.sut, "name") }} - {{ content(benchmark_score.benchmark_definition, "name") }}

{% include "_provisional.html" %} {{ use_hazards_limitations(benchmark_score.benchmark_definition) }} {{ interpret_safety_ratings() }}

Overall Safety Rating

{{ content("general", "overall_safety_rating") }}

{{ sut_card(benchmark_score) }}

Hazard Scoring Details

{{ content("general", "tests_run") }}

{{ test_runs(benchmark_score) }} {% include "_test_runs_legend.html" %}

Run Details

Benchmark UID

{{ benchmark_score.benchmark_definition.uid }}

Benchmark Version

{{ content(benchmark_score.benchmark_definition, "version") }}

Last Run

{{ benchmark_score.end_time.strftime('%Y-%m-%d %H:%M:%S %Z') }}

Model Display Name

{{ content(benchmark_score.sut, "name") }}

Model UID

{{ benchmark_score.sut.key }}


{% if mlc_branding %}

Don't see the tests you are looking for?

{{ content("general", "new_tests") | safe }}

{% endif %} {% endblock %}