PYTHON PANDAS

Pandas Dataframe creation

In [1]:
import pandas as pd
import numpy as np

table = pd.DataFrame()
a = ['arunn', 'nandhu','aravind','balaji','karuna','sagar']
b= [70,55,55,50,50,50]
c = [170,160,160,170,160,150]
table['name'] = a
table['weight'] = b
table['height'] = c
In [2]:
table
Out[2]:
name weight height
0 arunn 70 170
1 nandhu 55 160
2 aravind 55 160
3 balaji 50 170
4 karuna 50 160
5 sagar 50 150

PYTHON PANDAS SYNTAX

PANDAS COMMANDS:

In [20]:
# SELECT *

table;



 
# SELECT n

table[['name','height']];



# COUNT

len(table);



# UNIQUE VALUES

table['name'].unique();




# WHERE CONDITION -- =,<,>,!=

table[table['name'] == 'arunn'];


#AND/OR

table[(table['height'] == 150) & (table['weight'] == 70)];

table[(table['height'] == 150) | (table['weight'] == 70)];



#IN

table[table['height'].isin([150,170])];



#NULL/NOTNULL

table[table['height'].isna()];

table[table['height'].notna()];



#LIKE/NOTLIKE

table[table['name'].str.startswith('ar')];

table[table['name'].str.endswith('ar')];

table[table['name'].str.contains('nn')];



#AGG --SUM,MIN,MAX,AVG

table[['height','weight']].apply(np.sum);

table.agg(mean, axis = "columns");

table.agg({ 'height' : [min] ,  'weight' : [min , max] });



#ALIASING --AS

table.rename(index = str , columns = {'name' : 'NAME' , 'height' :'HEIGHT' , 'weight' : 'WEIGHT'});


#SORT

table.sort_values('name');

table.sort_values('name', ascending = False);

table.sort_values(['name','height']);




#GROUP -- WITH AGG

table['weight'].groupby(table['height']).sum()


#HAVING

table['weight'].groupby(table['height']).filter(lambda x : x.sum() == 120);


#UPDATE

table.loc[table['weight'] == 50, 'height'] = table.loc[table['weight'] == 50,'height'] + 10;


#DELETE

table.loc[table['weight'] == 50, 'height'] = np.NaN

table = table.dropna()

#Applying function to a pandas columns:

def funct(val):
    if val['weight'] == 70:
        return 700
    elif val['weight'] == 50:
        return 500
    else:
        return 550


table['weight'] = table.apply(funct, axis = 1)
table
Out[20]:
height
150     50
160    160
170    120
Name: weight, dtype: int64