Python-Assignment-2/PythonAsgn2.py at master · DataIntelligenceHub/Python-Assignment-2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 27 09:52:21 2017

@author: Umair.Hanif
"""

import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('ggplot')

my_data= pd.read_csv("C://Users//umair.hanif//Desktop//Learning Outcomes//UmairHanif_KHI_Python_Assignment2//hospitaldata.csv")

#Q1
for i in my_data.columns:
    if ".." in i:
        my_data.rename(columns={i: i.replace('..','_')}, inplace=True)

print(my_data.head())

#Q2
date=my_data['Date']
days=date.map(lambda x: str(x).split(',')[0])
counts=days.value_counts()
print(counts)

#Q3
my_data['Age']=pd.to_numeric(my_data['Age'],errors='coerce' )
my_data['Age'].mean()


#Q4
children=my_data[my_data.Age<=12]
len(children.index)

#Q5
str=my_data.Sex
str.replace("f", "F")
Male=my_data[my_data.Sex=='M']
Female=my_data[my_data.Sex=='F']
print("Male:" ,Male['Procedure'].value_counts().index.tolist()[0])
print("Female:" ,Female['Procedure'].value_counts().index.tolist()[0])


#Q6
my_data['Total_Charges']=pd.to_numeric(my_data['Total_Charges'],errors='coerce')
highest_earn = my_data[['Consulting_Doctor', 'Total_Charges']].groupby(['Consulting_Doctor']).sum()
print(highest_earn) # showing DR.Alaf khan is earning highest income


#Q7
proc=my_data[['Procedure', 'Total_Charges']].groupby(['Procedure']).sum()
print(proc)


#Q8
my_data.Time=pd.to_datetime(my_data.Time, errors='coerce')
my_data.Time.dt.hour.value_counts() # shows at 13th hours means 1 AM/PM the most patients had arrived


#Q9
def time_bracket(hour):
    if  hour>= 6.0 and hour<12.0:
        return "Morning"
    elif hour>= 12 and hour<14:
        return "Afternoon"
    elif  hour>=14 and hour<19 :
        return "Evening"
    elif  hour>=19 and hour<=23 or hour >= 0 and hour < 6 :
        return "Night"

my_data['Time_brackets']=my_data.Time.dt.hour.apply(time_bracket)


#Q10
rep_patients=my_data['id'].value_counts()
print(len(rep_patients[rep_patients>1].index))


#Q11
print("Id \t Visits\n",rep_patients)


#Q12
x=my_data[['id','Procedure']]
patient_visits=x.groupby(['id','Procedure']).size()
print(patient_visits[patient_visits>1])


#Q13
f_median=Female.Age.median()
m_median=Male.Age.median()
print("Female Age Median: ", f_median)
print("Male Age Median: ", m_median)


#Q15
consult_charges=my_data[my_data.Procedure=='Consultation']
consult_charges['Total_Charges'].sum()


#Q16
Cor=my_data.corr()
Cor.loc[['Age'],['Total_Charges']]


#Q17
my_data['Age'].plot.hist()
plt.show()


#Q18
my_data[(my_data.Procedure== 'X Rays') | (my_data.Procedure == 'Scalling')]['Total_Charges'].sum()