Python-Assignment-2/python_assignment_2.py at master · DataIntelligenceHub/Python-Assignment-2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 24 09:39:16 2017

@author: Syed.Adeel
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('ggplot')

df= pd.read_csv("E:\DIH\R Assignment\Assignment - 22-03-2017\hospitaldata.csv")
#columns name rename replacing .. to . and also . to ''
#print(df.head())
#Q1


for i in df.columns:
    if ".." in i:
        df.rename(columns={i: i.replace('..','')}, inplace=True)

for i in df.columns:
    if "." in i:
        df.rename(columns={i: i.replace('.','')}, inplace=True)
#Q2

date=df['Date']
days=date.map(lambda x: str(x).split(',')[0])
counts=days.value_counts().index.tolist()
print(counts[0])

#Q3
df['Age']=pd.to_numeric(df['Age'],errors='coerce' )
df['Age'].mean()

#Q4
childrentot=df[df.Age<=12]
len(childrentot.index)

#Q5

Male=df[df.Sex=='M']
print(Male['Procedure'].value_counts().index.tolist()[0])
Female=df[(df.Sex=='F')| (df.Sex=='f')]
print(Female['Procedure'].value_counts().index.tolist()[0])

#Q6
df['AmountReceived']=pd.to_numeric(df['AmountReceived'],errors='coerce')
grouped_dr = df.groupby(['ConsultingDoctor'])['AmountReceived'].sum()
print(str(grouped_dr.max())+" " +str(grouped_dr.idxmax()))

#Q7
df['AmountReceived']= pd.to_numeric(df['AmountReceived'],errors ='coerce')
highest_proc = df.groupby(['Procedure'])['AmountReceived'].sum()
print(str(highest_proc.max())+" " +str(highest_proc.idxmax()))

#Q8
df.Time=pd.to_datetime(df.Time, errors='coerce')
highest_freq_time = df.Time.dt.hour.value_counts()
print(highest_freq_time.idxmax())

#Q9
def timeslot(hours):
    if  hours >= 6 and hours < 12:
        return "Morning"
    elif hours >= 12 and hours < 14:
        return "Afternoon"
    elif  hours >= 14 and hours < 19:
        return "Evening"
    elif  hours >= 19 and hours <= 23 or hours >= 0 and hours < 6 :
        return "Night"
    else:
        return np.NAN

df['Time_Slot']=df.Time.dt.hour.apply(timeslot)
print(df.Time_Slot)

#Q10
repeated_patient_visits=df['id'].value_counts()
print(len(repeated_patient_visits[repeated_patient_visits>1].index))

#Q11
print(repeated_patient_visits[repeated_patient_visits>1].index)

#Q12
x=df[['id','Procedure']]
repeated_patient_visits=x.groupby(['id','Procedure']).size()
print(repeated_patient_visits[repeated_patient_visits>1])

#Q13
Female.Age.median()
Male.Age.median()

#Q14
df.replace(to_replace=df['AmountBalance'].unique()[0], value=np.nan, inplace=True)
df['AmountBalance']=df['AmountBalance'].map(lambda x: str(x).replace(',',''))
df['AmountBalance'] = df['AmountBalance'].astype(float)
print(df['AmountBalance'].sum())

#Q15

Consultation=df[df.Procedure=='Consultation']
Consultation['AmountReceived'].sum()

#Q18
df[(df.Procedure== 'X Rays') | (df.Procedure == 'Scalling')]['AmountReceived'].sum()