import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Read the 2015 data
y_2015 = pd.read_csv('data/2015.csv')

# Let's look at the first five records


y_2015[0:10].style.set_properties(**{'background-color': 'red'}, subset=['Happiness Score'])

# Use seaborn styles:

sns.set_palette("GnBu_d")
sns.set_style('whitegrid')

sns.jointplot(x='Happiness Score',y='Freedom',data=y_2015);

sns.relplot(x='Happiness Score',y='Economy (GDP per Capita)',data=y_2015,
            hue='Region');

# Let's build graphs of the interdependence of all parameters:
    

sns.pairplot(y_2015[['Happiness Score',
       'Economy (GDP per Capita)', 'Family',
       'Health (Life Expectancy)', 'Freedom', 'Trust (Government Corruption)',
       'Generosity', 'Dystopia Residual']]);

sns.lmplot(x='Happiness Score',y='Health (Life Expectancy)',data=y_2015);

sns.histplot(y_2015['Happiness Score'],bins=25,kde=True);

ax = sns.barplot(y="Region", x="Happiness Score", data=y_2015)

# Import data on suicide cases by country and year:

suicide = pd.read_csv('data/share-deaths-suicide.csv')

# Rename the columns:

suicide=suicide[['Entity','Year','Deaths - Self-harm - Sex: Both - Age: All Ages (Percent)']].rename(columns={'Entity':'Country','Deaths - Self-harm - Sex: Both - Age: All Ages (Percent)':'suicide'})

# Keep only the data for the year 2015 and check the info
suicide_2015 = suicide[suicide['Year'] == 2015]
suicide_2015.head()

# It seems there are no NaNs in the data, so we can merge the data:

df = pd.merge(y_2015, suicide_2015, on=['Country'])

# Drop the 'Year' column
df.drop(columns=['Year'], inplace=True)
df.head()

# Let's build a correlation map:
sns.heatmap(df[['Happiness Score',
       'Economy (GDP per Capita)', 'Family',
       'Health (Life Expectancy)', 'Freedom', 'Trust (Government Corruption)',
       'Generosity', 'Dystopia Residual','suicide']].corr(), annot=True)

<AxesSubplot:>

# Let's take a closer look at how suicide and the happiness index are interrelated
sns.lmplot(x='Happiness Score', y='suicide', data=df);

#ასევე შევადაროთ ეკონომიკურ მდგომარეობას
sns.lmplot(x='Economy (GDP per Capita)',y='suicide',data=df);

sns.relplot(data=df,x='Economy (GDP per Capita)',y='Happiness Score',hue='Region',size="suicide",sizes=(1, 200));

df.sort_values("suicide",ascending=False).head(10)

y_2016 = pd.read_csv('data/2016.csv')
y_2017 = pd.read_csv('data/2017.csv')
y_2018 = pd.read_csv('data/2018.csv')
y_2019 = pd.read_csv('data/2019.csv')
y_2020 = pd.read_csv('data/2020.csv')

plt.figure(figsize=(10,5))
sns.kdeplot(y_2015['Health (Life Expectancy)'],color='black')
sns.kdeplot(y_2016['Health (Life Expectancy)'],color='blue')
sns.kdeplot(y_2017['Health..Life.Expectancy.'],color='limegreen')
sns.kdeplot(y_2018['Healthy life expectancy'],color='orange')
sns.kdeplot(y_2019['Healthy life expectancy'],color='pink')
sns.kdeplot(y_2020['Explained by: Healthy life expectancy'],color='red')
plt.title('Health over the Years',size=20)
plt.legend([2015,2016,2017,2018,2019,2020])
plt.show()

plt.figure(figsize=(10,5))
sns.kdeplot(y_2015['Happiness Score'],color='black')
sns.kdeplot(y_2016['Happiness Score'],color='blue')
sns.kdeplot(y_2017['Happiness.Score'],color='limegreen')
sns.kdeplot(y_2018['Score'],color='orange')
sns.kdeplot(y_2019['Score'],color='pink')
sns.kdeplot(y_2020['Ladder score'],color='red')
plt.title('Happiness over the Years',size=20)
plt.legend([2015,2016,2017,2018,2019,2020])
plt.show()

import geopandas as gpd
sns.set_style("darkgrid")
import math
map_df = gpd.read_file('data/World_Countries__Generalized_.shp',)
map_df = map_df.replace({'Russian Federation':'Russia',
                        'Trinidad and Tobago': 'Trinidad & Tobago',
                        "Côte d'Ivoire": 'Ivory Coast',
                        'Congo': 'Congo (Brazzaville)',
                        'Congo DRC':'Congo (Kinshasa)',
                        'Palestinian Territory':'Palestinian Territories'})

data_2018 = y_2018.rename(index = str, columns = {'Country or region':"Country"})
df_2018 = data_2018[['Country','Score']]
merged = map_df.set_index('COUNTRY').join(df.set_index('Country'))

variable = 'Score'

vmin, vmax = 2.853,7.021

df_2018=df_2018.set_index('Country').T.to_dict('list')
updates = {'Maldives':[5.20],
          'Oman':[6.853],
          'Sudan':[4.14],
          'Djibouti':[4.37],
          'Angola':[3.80],
          'Belize':[5.95599985122681]
          }
df_2018.update(updates)

for i in range (len(merged)):
    if math.isnan(merged['Happiness Score'][i]):
        if (str(merged['COUNTRYAFF'][i])) in df_2018:
            merged['Happiness Score'][i] = float(df_2018[str(merged['COUNTRYAFF'][i])][0])

merged.head(5)

<ipython-input-28-53412ebec0e1>:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged['Happiness Score'][i] = float(df_2018[str(merged['COUNTRYAFF'][i])][0])

fig,ax = plt.subplots(1,figsize = (40,24))
sm = plt.cm.ScalarMappable(cmap='viridis',norm = plt.Normalize(vmin = vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm)

merged.plot(column = 'Happiness Score', cmap = 'viridis', linewidth = 0.3, ax=ax,edgecolor = '0.5')
ax.set_title('World Happiness Score',fontdict = {'fontsize':'40'})

Text(0.5, 1.0, 'World Happiness Score')

variable='suicide'
fig,ax = plt.subplots(1,figsize = (40,24))
sm = plt.cm.ScalarMappable(cmap='viridis',norm = plt.Normalize(vmin = vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm)

merged.plot(column = variable, cmap = 'viridis', linewidth = 0.3, ax=ax,edgecolor = '0.8')
ax.set_title('World Suicide rate',fontdict = {'fontsize':'40'})

Text(0.5, 1.0, 'World Suicide rate')

rel_data = pd.read_csv('data/religios_GDP.csv')
rel_data=rel_data.rename(columns={"country":"Country"})
rel_data

df_rel = pd.merge(df, rel_data, on=['Country'])

sns.lmplot(y='Happiness Score',x='religiousity%',data=df_rel);

sns.relplot(data=df_rel,x='religiousity%',y='Happiness Score',hue='Region',size="US$",sizes=(5, 400),alpha=0.5);

import plotly.express as px


fig = px.scatter(df_rel, x="religiousity%", y="Happiness Score",color='Region',size='US$',hover_name='Country')
fig.show()
fig.write_html("plotly/file.html")

covid = pd.read_csv('data/covid.csv')
covid=covid.rename(columns={"Unnamed: 3":"covid"})
covid=covid[['Country','covid']]
covid.dropna(inplace=True)
covid

y_2020=y_2020.rename(columns={"Country name":"Country",'Ladder score':'Happiness score'})

cov_data = pd.merge(covid, y_2020, on=['Country'])

cov_data['covid']=cov_data['covid'].str.replace(',', '')

test=cov_data[['covid','Happiness score']];
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler() ;
scaled_values = scaler.fit_transform(test);
test.loc[:,:] = scaled_values;

C:\Users\zurab\anaconda3\lib\site-packages\pandas\core\indexing.py:1637: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Users\zurab\anaconda3\lib\site-packages\pandas\core\indexing.py:692: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

cov_data['covid']=test['covid']

sns.relplot(data=cov_data,x='Explained by: Log GDP per capita',y='Explained by: Healthy life expectancy',
            hue='Regional indicator',size="covid",sizes=(3, 200),alpha=0.5);

dum_df = pd.get_dummies(df, columns=["Region"])

X=dum_df.drop(columns=["Country",'Happiness Rank','Trust (Government Corruption)','Standard Error','Happiness Score'])

y=dum_df['Happiness Score']

from sklearn.linear_model import LinearRegression

from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)


model = LinearRegression()
model.fit(X_train, y_train)

LinearRegression()

y_pred = model.predict(X_test)

from sklearn import metrics
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

Mean Absolute Error: 0.09184139883084416
Mean Squared Error: 0.013109356769966943
Root Mean Squared Error: 0.11449609936572924

fig,ax = plt.subplots()
ax.scatter(y_test, y_pred)
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
fig.show()

<ipython-input-50-73d758206e49>:6: UserWarning:

Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.

r_sq = model.score(X_train, y_train)
print('coefficient of determination:', r_sq)

coefficient of determination: 0.9940367076549931

suicide_2016=suicide[suicide['Year']==2016]
df_2016 = pd.merge(y_2015, suicide_2015, on=['Country'])
df_2016.drop(columns=['Year'],inplace=True)
dum_df_2016 = pd.get_dummies(df_2016, columns=["Region"])

dum_df.columns

Index(['Country', 'Happiness Rank', 'Happiness Score', 'Standard Error',
       'Economy (GDP per Capita)', 'Family', 'Health (Life Expectancy)',
       'Freedom', 'Trust (Government Corruption)', 'Generosity',
       'Dystopia Residual', 'suicide', 'Region_Australia and New Zealand',
       'Region_Central and Eastern Europe', 'Region_Eastern Asia',
       'Region_Latin America and Caribbean',
       'Region_Middle East and Northern Africa', 'Region_North America',
       'Region_Southeastern Asia', 'Region_Southern Asia',
       'Region_Sub-Saharan Africa', 'Region_Western Europe'],
      dtype='object')

# X_train=dum_df.drop(columns=["Country",'Happiness Rank','Trust (Government Corruption)','Standard Error','Happiness Score'])
# y_train=dum_df['suicide']

X_test=dum_df_2016.drop(columns=["Country",'Happiness Rank','Trust (Government Corruption)','Standard Error','Happiness Score'])
y_test=dum_df_2016['Happiness Score']





# model_2 = LinearRegression()
# model_2.fit(X_train, y_train)

y_pred = model.predict(X_test)

from sklearn import metrics
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

Mean Absolute Error: 0.0754315865740917
Mean Squared Error: 0.009057826512209627
Root Mean Squared Error: 0.09517261429744182

fig,ax = plt.subplots()
ax.scatter(y_test, y_pred,alpha=0.3,c='r',s=100)
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
ax.set_title(' Number of Suicide')
fig.show()

<ipython-input-57-642a7a46ea6b>:7: UserWarning:

Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.

r_sq = model.score(X_train, y_train)
print('coefficient of determination:', r_sq)

coefficient of determination: 0.9940367076549931

	Country	Region	Happiness Rank	Happiness Score	Standard Error	Economy (GDP per Capita)	Family	Health (Life Expectancy)	Freedom	Trust (Government Corruption)	Generosity	Dystopia Residual
0	Switzerland	Western Europe	1	7.587000	0.034110	1.396510	1.349510	0.941430	0.665570	0.419780	0.296780	2.517380
1	Iceland	Western Europe	2	7.561000	0.048840	1.302320	1.402230	0.947840	0.628770	0.141450	0.436300	2.702010
2	Denmark	Western Europe	3	7.527000	0.033280	1.325480	1.360580	0.874640	0.649380	0.483570	0.341390	2.492040
3	Norway	Western Europe	4	7.522000	0.038800	1.459000	1.330950	0.885210	0.669730	0.365030	0.346990	2.465310
4	Canada	North America	5	7.427000	0.035530	1.326290	1.322610	0.905630	0.632970	0.329570	0.458110	2.451760
5	Finland	Western Europe	6	7.406000	0.031400	1.290250	1.318260	0.889110	0.641690	0.413720	0.233510	2.619550
6	Netherlands	Western Europe	7	7.378000	0.027990	1.329440	1.280170	0.892840	0.615760	0.318140	0.476100	2.465700
7	Sweden	Western Europe	8	7.364000	0.031570	1.331710	1.289070	0.910870	0.659800	0.438440	0.362620	2.371190
8	New Zealand	Australia and New Zealand	9	7.286000	0.033710	1.250180	1.319670	0.908370	0.639380	0.429220	0.475010	2.264250
9	Australia	Australia and New Zealand	10	7.284000	0.040830	1.333580	1.309230	0.931560	0.651240	0.356370	0.435620	2.266460

	Country	Region	Happiness Rank	Happiness Score	Standard Error	Economy (GDP per Capita)	Family	Health (Life Expectancy)	Freedom	Trust (Government Corruption)	Generosity	Dystopia Residual	suicide
0	Switzerland	Western Europe	1	7.587	0.03411	1.39651	1.34951	0.94143	0.66557	0.41978	0.29678	2.51738	1.827820
1	Iceland	Western Europe	2	7.561	0.04884	1.30232	1.40223	0.94784	0.62877	0.14145	0.43630	2.70201	1.884182
2	Denmark	Western Europe	3	7.527	0.03328	1.32548	1.36058	0.87464	0.64938	0.48357	0.34139	2.49204	1.281190
3	Norway	Western Europe	4	7.522	0.03880	1.45900	1.33095	0.88521	0.66973	0.36503	0.34699	2.46531	1.429625
4	Canada	North America	5	7.427	0.03553	1.32629	1.32261	0.90563	0.63297	0.32957	0.45811	2.45176	1.753301

	Country	Region	Happiness Rank	Happiness Score	Standard Error	Economy (GDP per Capita)	Family	Health (Life Expectancy)	Freedom	Trust (Government Corruption)	Generosity	Dystopia Residual	suicide
45	South Korea	Eastern Asia	47	5.984	0.04098	1.24461	0.95774	0.96538	0.33208	0.07857	0.18557	2.21978	5.595541
27	Qatar	Middle East and Northern Africa	28	6.611	0.06257	1.69042	1.07860	0.79733	0.64040	0.52208	0.32573	1.55674	3.970951
122	Sri Lanka	Southern Asia	132	4.271	0.03751	0.83524	1.01905	0.70806	0.53726	0.09179	0.40828	0.67108	3.597424
38	Suriname	Latin America and Caribbean	40	6.269	0.09811	0.99534	0.97200	0.60820	0.59657	0.13633	0.16991	2.79094	3.492400
52	Kazakhstan	Central and Eastern Europe	54	5.855	0.04114	1.12254	1.12241	0.64368	0.51649	0.08454	0.11827	2.24729	3.132291
54	Lithuania	Central and Eastern Europe	56	5.833	0.03843	1.14723	1.25745	0.73128	0.21342	0.01031	0.02641	2.44649	2.655972
23	Singapore	Southeastern Asia	24	6.798	0.03780	1.52186	1.02000	1.02525	0.54252	0.49210	0.31105	1.88501	2.641140
93	Mongolia	Eastern Asia	100	4.874	0.03313	0.82819	1.30060	0.60268	0.43626	0.02666	0.33230	1.34759	2.599839
19	United Arab Emirates	Middle East and Northern Africa	20	6.901	0.03729	1.42727	1.12575	0.80925	0.64157	0.38583	0.26428	2.24743	2.540437
36	Taiwan	Eastern Asia	38	6.298	0.03868	1.29098	1.07617	0.87530	0.39740	0.08129	0.25376	2.32323	2.499941

	Unnamed: 0	Country	religiousity%	US$
0	0	Angola	88	4465
1	1	Brazil	79	9895
2	2	Bulgaria	52	8077
3	3	Colombia	82	6379
4	4	Tanzania	89	1034
...	...	...	...	...
143	143	Rwanda	95	772
144	144	Puerto Rico	85	31581
145	145	Qatar	95	61024
146	146	Togo	80	611
147	147	Taiwan	45	24292

	Country	covid
1	Afghanistan	1,427.73
2	Africa	2,846.00
3	Albania	34,422.13
4	Algeria	2,548.70
5	Andorra	138,109.11
...	...	...
194	Vietnam	24.31
195	World	14,249.67
196	Yemen	72.29
197	Zambia	4,019.37
198	Zimbabwe	2,406.46

Happiness Index (Introduction)¶

World Happiness Index Report¶

Data Exploration¶

Suicide Cases¶

2015-2020 პერიოდის ანალიზი¶

ვნახოთ როგორ იცვლებოდა ჯანმრთელობის ინდექსი წლების განმაცლობაში¶

ბედნიერების ინდექსის ცვლილება :¶

Geopandas-ს გამოყენებით რუკაზე მონაცემების გამოსახვა¶

რელიგიურობა¶

Covid-ის მონაცემები 1000 000 ადამიანზე¶

წრფივი რეგრესიის მოდელი¶

	Country	Year	suicide
25	Afghanistan	2015	0.666678
53	Albania	2015	0.786902
81	Algeria	2015	1.048141
109	American Samoa	2015	1.086531
137	Andean Latin America	2015	1.128180

	FID	ISO	COUNTRYAFF	AFF_ISO	SHAPE_Leng	SHAPE_Area	geometry	Region	Happiness Rank	Happiness Score	Standard Error	Economy (GDP per Capita)	Family	Health (Life Expectancy)	Freedom	Trust (Government Corruption)	Generosity	Dystopia Residual	suicide
COUNTRY
American Samoa	1	AS	United States	US	0.600124	0.013720	POLYGON ((-170.74390 -14.37555, -170.74942 -14...	NaN	NaN	6.886	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
United States Minor Outlying Islands	2	UM	United States	US	0.480216	0.003216	MULTIPOLYGON (((-160.02114 -0.39805, -160.0281...	NaN	NaN	6.886	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
Cook Islands	3	CK	New Zealand	NZ	0.980664	0.013073	MULTIPOLYGON (((-159.74698 -21.25667, -159.793...	NaN	NaN	7.324	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
French Polynesia	4	PF	France	FR	3.930211	0.175332	MULTIPOLYGON (((-149.17920 -17.87084, -149.258...	NaN	NaN	6.489	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
Niue	5	NU	New Zealand	NZ	0.541413	0.021414	POLYGON ((-169.89389 -19.14556, -169.93088 -19...	NaN	NaN	7.324	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN