Pandas Dataframe creation
import pandas as pd
import numpy as np
table = pd.DataFrame()
a = ['arunn', 'nandhu','aravind','balaji','karuna','sagar']
b= [70,55,55,50,50,50]
c = [170,160,160,170,160,150]
table['name'] = a
table['weight'] = b
table['height'] = c
table
PYTHON PANDAS SYNTAX
# SELECT *
table;
# SELECT n
table[['name','height']];
# COUNT
len(table);
# UNIQUE VALUES
table['name'].unique();
# WHERE CONDITION -- =,<,>,!=
table[table['name'] == 'arunn'];
#AND/OR
table[(table['height'] == 150) & (table['weight'] == 70)];
table[(table['height'] == 150) | (table['weight'] == 70)];
#IN
table[table['height'].isin([150,170])];
#NULL/NOTNULL
table[table['height'].isna()];
table[table['height'].notna()];
#LIKE/NOTLIKE
table[table['name'].str.startswith('ar')];
table[table['name'].str.endswith('ar')];
table[table['name'].str.contains('nn')];
#AGG --SUM,MIN,MAX,AVG
table[['height','weight']].apply(np.sum);
table.agg(mean, axis = "columns");
table.agg({ 'height' : [min] , 'weight' : [min , max] });
#ALIASING --AS
table.rename(index = str , columns = {'name' : 'NAME' , 'height' :'HEIGHT' , 'weight' : 'WEIGHT'});
#SORT
table.sort_values('name');
table.sort_values('name', ascending = False);
table.sort_values(['name','height']);
#GROUP -- WITH AGG
table['weight'].groupby(table['height']).sum()
#HAVING
table['weight'].groupby(table['height']).filter(lambda x : x.sum() == 120);
#UPDATE
table.loc[table['weight'] == 50, 'height'] = table.loc[table['weight'] == 50,'height'] + 10;
#DELETE
table.loc[table['weight'] == 50, 'height'] = np.NaN
table = table.dropna()
#Applying function to a pandas columns:
def funct(val):
if val['weight'] == 70:
return 700
elif val['weight'] == 50:
return 500
else:
return 550
table['weight'] = table.apply(funct, axis = 1)
table