--- title: Splitting keywords: fastai sidebar: home_sidebar summary: "Data Splitting Transforms." description: "Data Splitting Transforms." nb_path: "nbs/transforms/splitting.ipynb" ---
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
df = pd.DataFrame.from_dict(
{
'user':[1,1,1,1,1,2,2,3],
'item':[1,2,3,2,2,1,2,3]
}
)
df
train, test = split_by_ratio(df, shuffle=False, test_size=0.2, pad_unknown=True, filter_unknown=False)
print("train:\n{}\n\ntest:\n{}".format(train,test))
train, test = split_by_ratio(df, shuffle=False, test_size=0.4, pad_unknown=True, filter_unknown=True)
print("train:\n{}\n\ntest:\n{}".format(train,test))
import pandas as pd
df = pd.DataFrame.from_dict({
'session_id': [357,359,394,4127,6400],
'sequence': [[793, 3489],[1762],[1256],
[1948, 1364, 2060, 1115, 6488, 2060],
[687, 1394]],
'ts': [1421003874, 1421018535, 1421007470,
1421416896, 1420807778],
'user_id': [4296, 4296, 30980, 28117, 35247]
})
df
train_data, test_data = last_session_out_split(df)
train_data