MT-Cleaner Report

Text Cleaning Statistics and Analysis

Overview

Total Pairs

{{ stats.total_pairs }}

Final Pairs

{{ stats.final_pairs }}

Reduction

{{ "%.2f"|format(stats.reduction_percentage) }}%

Filtering Results

{{ filtering_chart | safe }}

Length Distribution

{{ length_plot | safe }}
{% if stats.quality_stats %}

Quality Statistics

{% for key, value in stats.quality_stats.items() %} {% endfor %}
Metric Value
{{ key }} {{ "%.3f"|format(value) if value is number else value }}
{% endif %}

Sample Filtered Pairs

{% if stats.filtered_samples.empty_samples %}

Empty After Cleaning

{% for src, tgt in stats.filtered_samples.empty_samples %}
Source: {{ src }}
Target: {{ tgt }}
{% endfor %}
{% endif %} {% if stats.filtered_samples.too_short_samples %}

Too Short

{% for src, tgt in stats.filtered_samples.too_short_samples %}
Source: {{ src }}
Target: {{ tgt }}
{% endfor %}
{% endif %} {% if stats.filtered_samples.too_long_samples %}

Too Long

{% for src, tgt in stats.filtered_samples.too_long_samples %}
Source: {{ src }}
Target: {{ tgt }}
{% endfor %}
{% endif %} {% if stats.filtered_samples.word_count_samples %}

Word Count Filtered

{% for src, tgt in stats.filtered_samples.word_count_samples %}
Source: {{ src }}
Target: {{ tgt }}
{% endfor %}
{% endif %} {% if stats.filtered_samples.length_outliers_samples %}

Length Outliers

{% for src, tgt in stats.filtered_samples.length_outliers_samples %}
Source: {{ src }}
Target: {{ tgt }}
{% endfor %}
{% endif %} {% if stats.filtered_samples.domain_outliers_samples %}

Domain Outliers

{% for src, tgt in stats.filtered_samples.domain_outliers_samples %}
Source: {{ src }}
Target: {{ tgt }}
{% endfor %}
{% endif %} {% if stats.filtered_samples.quality_filtered_samples %}

Quality Filtered

{% for src, tgt in stats.filtered_samples.quality_filtered_samples %}
Source: {{ src }}
Target: {{ tgt }}
{% endfor %}
{% endif %}