# -*- coding: utf-8 -*-
"""
Created on Wed May 25 08:52:38 2016
@author: DH KIM
"""
# ANOVA
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import seaborn as sns
import matplotlib.pyplot as plt
data = pd.read_csv('nesarc_pds.csv', low_memory=False)
#setting variables you will be working with to numeric
data['S1Q10A'] = pd.to_numeric(data['S1Q10A'], errors='coerce')
data['S3AQ3B1'] = pd.to_numeric(data['S3AQ3B1'], errors='coerce')
data['S3AQ3C1'] = pd.to_numeric(data['S3AQ3C1'], errors='coerce')
data['CHECK321'] = pd.to_numeric(data['CHECK321'], errors='coerce')
data['SEX'] = pd.to_numeric(data['SEX'], errors='coerce')
#subset data to those people who have income and have smoked in the past 12 months
sub=data[ (data['S1Q10B']!=0) & (data['CHECK321']==1)]
sub2 = sub[['SEX', 'S3AQ3B1', 'S3AQ3C1', 'S1Q10A']].copy()
sub2['S3AQ3B1']=sub2['S3AQ3B1'].replace(9, np.nan)
sub2['S3AQ3C1']=sub2['S3AQ3C1'].replace(99, np.nan)
recode1 = {1: 30, 2: 22, 3: 14, 4: 5, 5: 2.5, 6: 1}
sub2['USFREQMO']= sub2['S3AQ3B1'].map(recode1)
# mean value of personal income in last 12 month
avgIncome = sub2['S1Q10A'].mean()
LvIncome = [0, sub2['S1Q10A'].quantile(0.3), sub2['S1Q10A'].quantile(0.7), sub2['S1Q10A'].max()]
# Split into 3 groups: 'low', 'medium', 'high'
splitIntoCat = pd.cut(sub2.S1Q10A, LvIncome, labels=['low', 'medium', 'high'])
sub2['INCOMECAT'] = splitIntoCat
# num of ciga per month
sub2['NUMCIGMO_EST']=sub2['USFREQMO'] * sub2['S3AQ3C1']
sub1 = sub2[['NUMCIGMO_EST', 'INCOMECAT', 'SEX']].dropna()
sub1.SEX = sub1.SEX - 1 # 0: Male, 1: Female
# using ols function for calculating the F-statistic and associated p value
model = smf.ols(formula='NUMCIGMO_EST ~ C(INCOMECAT)', data=sub1).fit()
print (model.summary())
print ('means for smoking amount by personal income')
print(sub1.groupby('INCOMECAT').mean())
print ('standard deviations for smoking amount by personal income')
print(sub1.groupby('INCOMECAT').std())
sns.factorplot(x="INCOMECAT", y="NUMCIGMO_EST", data=sub1, kind="bar", ci=None)
plt.xlabel('Income Level')
plt.ylabel('Smoking Amount')
# Moderator: SEX
print ('means for numcigmo_est by personal income according to SEX')
print(sub1.groupby('SEX').mean())
sub_m=sub1[ sub1.SEX == 0]
sub_f=sub1[ sub1.SEX == 1]
print ('Association between income level and smoking amount for Male')
model_m = smf.ols(formula='NUMCIGMO_EST ~ C(INCOMECAT)', data=sub_m).fit()
print (model_m.summary())
print ('Association between income level and smoking amount for Female')
model_f = smf.ols(formula='NUMCIGMO_EST ~ C(INCOMECAT)', data=sub_f).fit()
print (model_f.summary())
print ('Means for smoking amount by personal income for Male')
print (sub_m.groupby('INCOMECAT').mean())
print ('Means for smoking amount by personal income for Female')
print (sub_f.groupby('INCOMECAT').mean())
sns.factorplot(x="INCOMECAT", y="NUMCIGMO_EST", data=sub_m, kind="bar", ci=None)
plt.title('Smoking Amount according to income level for Male')
plt.xlabel('Income Level')
plt.ylabel('Smoking Amount')
sns.factorplot(x="INCOMECAT", y="NUMCIGMO_EST", data=sub_f, kind="bar", ci=None)
plt.title('Smoking Amount according to income level for Female')
plt.xlabel('Income Level')
plt.ylabel('Smoking Amount')
피드 구독하기:
댓글 (Atom)
블로그 보관함
Categories
Labels
Advertising
Disqus Shortname
Popular Posts
-
유니티 튜토리얼 진행중에 둘의 차이점이 궁금해졌다. 어떨때는 gameObject로 사용하다가 어떨때는 GameObject로 사용하고.. 유니티 매뉴얼을 뒤져보면 GameObject는 Base Class로 Object Class를 상속 받고....
-
1. Make Mistake That's what I've done so far. 2. Scrap the Foreign Alphabet Make sense when it comes to English, however,...
-
“”“ S1Q10A: Total personal income in last 12M (Quantitative) SINTOCAT: Total personal imcome in last 12M (Categorical, Low: 0~30%, Medium: ...
-
Get the hang of it, Get the knack of it! 인터넷 서핑중에 아는 단어들의 조합임에도 도무지 해석이 불가한 Idiom(?)을 발견했는데 직관적으로는 대채 무슨뜻인지 당췌 알 수가 없다. 또 잊어먹기전에 정리를 해두...
-
It’s a sequel to the 1st assignment - study on the relationship b.t.w personal income and smoking amount. It’s already been verified thro...
-
http://hotgott.tumblr.com/post/144729116411/python-code-test-a-basic-linear-regression
-
이 블로그의 시작이 내인생에 있어서 또하나의 흐지부지한 결말중의 하나가 될 수도 있을거 같지만, 원래 성격상 호기심에 이것저것 자꾸 새로운 할 짓(?)을 찾아내서... 평소 관심사를 총망라해서 이것저것 작성해 볼 생각인데, 한두해 쌓이다 보면 내...
-
I’d like to study about the relationship btw total personal income(explanatory) and depression(response) by using NESARC. ‘S1Q10A’: ...
-
# -*- coding: utf-8 -*- """ Created on Wed May 25 08:52:38 2016 @author: DH KIM """ # ANOVA import nu...
댓글 없음:
댓글 쓰기