--- title: Common utils keywords: fastai sidebar: home_sidebar summary: "A collection of utilities often used." description: "A collection of utilities often used." nb_path: "nbs/utils/utils.common_utils.ipynb" ---
download_url('https://files.grouplens.org/datasets/movielens/ml-1m.zip',
'./data/bronze')
!tree ./data
!tree --du -h -C ./data
results = [{'model':'MF', 'MRR':.35},
{'model':'NCF', 'MRR':.42, 'nDCG':.25}]
print_result_as_table(results)
list_files('./sample_data')
import unittest
from numpy.testing import assert_array_equal
class TestUtils(unittest.TestCase):
def testColMapping(self):
"test the column mapping function"
df = pd.DataFrame(
{'uid': [1,2,3,4],
'sid': [1,3,5,7]}
)
df, _, _ = map_column(df, col_name='sid')
assert_array_equal(df.sid_mapped.values,
[2, 3, 4, 5])
def testSplit(self):
"test the train/test/val split"
SEED = 42
df = pd.DataFrame(
{'uid': list(np.arange(50)),
'sid': list(np.arange(50))}
)
context = get_context(df, split='train', context_size=5, seed=SEED)
assert_array_equal(context.sid.values,
[12, 13, 14, 15, 16])
def testArrayPadding(self):
"test array padding function"
pad_output_1 = pad_arr(np.array([[1,2,3],[7,8,9]]), expected_size=5)
pad_output_2 = pad_arr(np.array([[1,2,3]]), expected_size=3)
assert_array_equal(pad_output_1,
[[1, 2, 3],
[1, 2, 3],
[1, 2, 3],
[1, 2, 3],
[7, 8, 9]])
assert_array_equal(pad_output_2,
[[1, 2, 3],
[1, 2, 3],
[1, 2, 3]])
def testListPadding(self):
"test list padding function"
pad_output_1 = pad_list([1,2,3], history_size=5, pad_val=0, mode='left')
pad_output_2 = pad_list([1,2,3], history_size=6, pad_val=1, mode='right')
assert_array_equal(pad_output_1,
[0, 0, 1, 2, 3])
assert_array_equal(pad_output_2,
[1, 2, 3, 1, 1, 1])
class TestModelUtils(unittest.TestCase):
def testMaskedAccuracy(self):
"test the masked accuracy"
output1 = masked_accuracy(torch.Tensor([[0,1,1,0]]),
torch.Tensor([[0,1,1,1]]),
torch.tensor([1,1,1,1], dtype=torch.bool))
output2 = masked_accuracy(torch.Tensor([[0,1,1,0]]),
torch.Tensor([[0,1,1,1]]),
torch.tensor([1,0,0,1], dtype=torch.bool))
self.assertEqual(output1, torch.tensor(0.75, dtype=torch.float64))
self.assertEqual(output2, torch.tensor(0.5, dtype=torch.float64))
def testMaskedCrossEntropy(self):
input = [[1.1049, 1.5729, 1.4864],
[-1.8321, -0.3137, -0.3257]]
target = [0,2]
output1 = masked_ce(torch.tensor(input),
torch.tensor(target),
torch.tensor([1,0], dtype=torch.bool))
output2 = masked_ce(torch.tensor(input),
torch.tensor(target),
torch.tensor([1,1], dtype=torch.bool))
assert_array_equal(output1.numpy().round(4),
np.array(1.4015, dtype=np.float32))
assert_array_equal(output2.numpy().round(4),
np.array(1.1026, dtype=np.float32))
def testMaskList(self):
seed = 42
assert_array_equal(mask_list([1,2,3,4,5,6,7,8], seed=seed),
[1,2,3,4,5,6,1,8])
seed = 40
assert_array_equal(mask_list([1,2,3,4,5,6,7,8], seed=seed),
[1,1,3,4,1,6,7,8])
def testMaskListLastElement(self):
seed = 42
output1 = mask_last_elements_list([1,2,3,4,5,6,7,8], val_context_size=5, seed=seed)
output2 = mask_last_elements_list([1,2,3,4,5,6,7,8], val_context_size=3, seed=seed)
assert_array_equal(output1, [1,2,3,1,5,6,7,1])
assert_array_equal(output2, [1,2,3,4,5,1,7,8])
unittest.main(argv=[''], verbosity=2, exit=False)