import matplotlib.pyplot as plt
nesarc = pandas.read_csv ('nesarc_pds.csv', low_memory=False)
pandas.set_option('display.max_columns' , None) pandas.set_option('display.max_rows' , None) nesarc.columns = map(str.upper , nesarc.columns) pandas.set_option('display.float_format' , lambda x:'%f'%x) # Change my variables to numeric
nesarc['AGE']=nesarc['AGE'].convert_objects(convert_numeric=True)nesarc['MAJORDEP12']=nesarc['MAJORDEP12'].convert_objects(convert_numeric=True)
nesarc['S1Q231']=nesarc['S1Q231'].convert_objects(convert_numeric=True)
nesarc['S3BQ1A5']=nesarc['S3BQ1A5'].convert_objects(convert_numeric=True)
nesarc['S3BD5Q2E']=nesarc['S3BD5Q2E'].convert_objects(convert_numeric=True)
subset1 = nesarc[(nesarc['AGE']>=18) & (nesarc['AGE']<=30) & nesarc['S3BQ1A5']==1]
subsetc1 = subset1.copy() subsetc1['S1Q231']=subsetc1['S1Q231'].replace(9, numpy.nan)
subsetc1['S3BQ1A5']=subsetc1['S3BQ1A5'].replace(9, numpy.nan)
subsetc1['S3BD5Q2E']=subsetc1['S3BD5Q2E'].replace(99, numpy.nan)
subsetc1['S3BD5Q2E']=subsetc1['S3BD5Q2E'].replace('BL', numpy.nan)
recode1 = {1: 9, 2: 8, 3: 7, 4: 6, 5: 5, 6: 4, 7: 3, 8: 2, 9: 1} subsetc1['CUFREQ'] = subsetc1['S3BD5Q2E'].map(recode1)
subsetc1['CUFREQ'] = subsetc1['CUFREQ'].astype('category') subsetc1['CUFREQ'] =subsetc1['CUFREQ'].cat.rename_categories(["2 times/year","3-6 times/year","7-11 times/year","Once a month","2-3 times/month","1-2 times/week","3-4 times/week","Nearly every day","Every day"])
contab1 = pandas.crosstab(subsetc1['MAJORDEP12'], subsetc1['CUFREQ'])
colsum=contab1.sum(axis=0)
colpcontab=contab1/colsumprint(colpcontab)
print ('Chi-square value, p value, expected counts, for major depression within cannabis use status')
chsq1= scipy.stats.chi2_contingency(contab1)
plt.figure(figsize=(12,4))
sizeax1 = seaborn.factorplot(x="CUFREQ", y="MAJORDEP12", data=subsetc1, kind="bar", ci=None)
ax1.set_xticklabels(rotation=40, ha="right")
plt.xlabel('Frequency of cannabis use')
plt.ylabel('Proportion of Major Depression')
recode2 = {1: 10, 2: 9, 3: 8, 4: 7, 5: 6, 6: 5, 7: 4, 8: 3, 9: 2, 10: 1} subsetc1['CUFREQ2'] = subsetc1['S3BD5Q2E'].map(recode2) sub1=subsetc1[(subsetc1['S1Q231']== 1)]
sub2=subsetc1[(subsetc1['S1Q231']== 2)]
print ('Association between cannabis use status and major depression for those who lost a family member or a close friend in the last 12 months')
contab2=pandas.crosstab(sub1['MAJORDEP12'], sub1['CUFREQ2'])print (contab2) ] colsum2=contab2.sum(axis=0)
colpcontab2=contab2/colsum2
print ('Chi-square value, p value, expected counts')
chsq2= scipy.stats.chi2_contingency(contab2)
plt.figure(figsize=(12,4))
sizeax2 = seaborn.factorplot(x="CUFREQ", y="MAJORDEP12", data=sub1, kind="point", ci=None)
ax2.set_xticklabels(rotation=40, ha="right")
plt.xlabel('Frequency of cannabis use')
plt.ylabel('Proportion of Major Depression')
plt.title('Association between cannabis use status and major depression for those who lost a family member or a close friend in the last 12 months')