--- title: Metric utils keywords: fastai sidebar: home_sidebar summary: "Model evaluation utilities." description: "Model evaluation utilities." nb_path: "nbs/metrics/utils.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

calculate_precision_recall[source]

calculate_precision_recall(X, y_true, y_pred, N, threshold)

Calculate the precision and recall scores.

Args: X y_true y_pred N threshold

Returns: precision_score (float) recall_score (float)

{% endraw %} {% raw %}
{% endraw %} {% raw %}

calculate_ndcg[source]

calculate_ndcg(X, y_true, y_pred, N)

Calculate the NDCG score.

Args: X y_true y_pred N

Returns: ndcg_score (float)

{% endraw %} {% raw %}
{% endraw %} {% raw %}

recall[source]

recall(scores, labels, k)

{% endraw %} {% raw %}
{% endraw %} {% raw %}

ndcg[source]

ndcg(scores, labels, k)

{% endraw %} {% raw %}
{% endraw %} {% raw %}

recalls_and_ndcgs_for_ks[source]

recalls_and_ndcgs_for_ks(scores, labels, ks)

{% endraw %} {% raw %}
{% endraw %}

Testing

Sample data

{% raw %}
import pandas as pd

sample_data = pd.DataFrame.from_records(
    [{'deepmf': 2.7268011569976807,
  'item': 496,
  'ncf': 2.854853630065918,
  'user': 68,
  'vdeepmf': 2.4722371101379395,
  'vncf': 2.620150566101074,
  'y_test': 3.0},
 {'deepmf': 3.3491923809051514,
  'item': 473,
  'ncf': 3.0023105144500732,
  'user': 633,
  'vdeepmf': 2.847424030303955,
  'vncf': 2.6900570392608643,
  'y_test': 3.5},
 {'deepmf': 3.7268624305725098,
  'item': 329,
  'ncf': 3.605560779571533,
  'user': 1405,
  'vdeepmf': 3.810497283935547,
  'vncf': 3.466035842895508,
  'y_test': 4.0},
 {'deepmf': 3.4670088291168213,
  'item': 328,
  'ncf': 3.389759063720703,
  'user': 1240,
  'vdeepmf': 3.6399013996124268,
  'vncf': 3.205043315887451,
  'y_test': 0.5},
 {'deepmf': 3.140076160430908,
  'item': 54,
  'ncf': 3.1944096088409424,
  'user': 841,
  'vdeepmf': 2.887760877609253,
  'vncf': 2.848487138748169,
  'y_test': 3.0}]
)
sample_data
deepmf item ncf user vdeepmf vncf y_test
0 2.726801 496 2.854854 68 2.472237 2.620151 3.0
1 3.349192 473 3.002311 633 2.847424 2.690057 3.5
2 3.726862 329 3.605561 1405 3.810497 3.466036 4.0
3 3.467009 328 3.389759 1240 3.639901 3.205043 0.5
4 3.140076 54 3.194410 841 2.887761 2.848487 3.0
{% endraw %} {% raw %}
sample_data_2 = {
    'y_true':np.array([1,2,3,4,5]),
    'y_pred':np.array([1,3,3,2,4]),
    'ids':np.array([[1,2],[1,3],[2,4],[2,5],[3,2]]),
}
{% endraw %}

Unittest

{% raw %}
import unittest
{% endraw %} {% raw %}
class TestMetricUtils(unittest.TestCase):
    def setUp(self):
        self.sample_data = sample_data
        self.sample_data_2 = sample_data_2
        self.method = 'ncf'
        self.like_threshold = 3
            
    def testPrecisionRecall(self):
        num_recommendations = 2
        ids = self.sample_data[['user', 'item']].to_numpy()
        y_true = self.sample_data['y_test'].to_numpy()
        y_pred = self.sample_data[self.method].to_numpy()
        precision, recall = calculate_precision_recall(ids, y_true, y_pred, num_recommendations, self.like_threshold)
        self.assertEqual(precision, 0.75)
        self.assertEqual(recall, 0.75)

    def testNDCG(self):
        num_recommendations = 2
        ids = self.sample_data_2['ids']
        y_true = self.sample_data_2['y_true']
        y_pred = self.sample_data_2['y_pred']
        ndcg = calculate_ndcg(ids, y_true, y_pred, num_recommendations)
        self.assertAlmostEqual(ndcg, 0.9686, 3)
{% endraw %} {% raw %}
unittest.main(argv=[''], verbosity=2, exit=False)
testNDCG (__main__.TestMetricUtils) ... ok
testPrecisionRecall (__main__.TestMetricUtils) ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.017s

OK
<unittest.main.TestProgram at 0x7f1413e21690>
{% endraw %}