In this notebook, we create some dataframes from cleaned datasets to support data visualization.¶

from pandas import DataFrame
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# list of countries
country_names = ["Australia","Canada","France","Germany","Italy","Japan","South Korea","Mexico","United Kingdom",
                 "United States", "Brazil","India","Indonesia","Russia","South Africa","Turkey","China","Argentina","Saudi Arabia"]

years = ["2011","2012","2013","2014","2015","2016","total"] # get list of years

fields = ["Military","Health","Education"] # list of categories

Total Expenditure by Military, Health, Education¶

military_total = pd.read_csv("../data/cleaned data/military_total.csv") # read csv files
health_total = pd.read_csv("../data/cleaned data/health_total.csv")
education_total = pd.read_csv("../data/cleaned data/education_total.csv")

total_spend = pd.DataFrame(index=np.arange(len(years)*len(country_names)),
                           columns = ["Country","Year","Military","Health","Education"]) # create a dataframe

# retrieve the values for the dataframe from the 3 datasets above
for i in range (0, len(country_names)*len(years), len(country_names)):
    for j in range(0, len(country_names)):
        k = int(i/len(country_names))
        total_spend.ix[i + j, "Year"] = years[k]
        total_spend.ix[i + j, "Country"] = country_names[j]
        
        total_spend.ix[i + j, "Military"] = float(military_total[military_total.Country == country_names[j]][years[k]])
        
        total_spend.ix[i + j, "Health"] = float(health_total[health_total.Country == country_names[j]][years[k]])
        
        total_spend.ix[i + j, "Education"] = float(education_total[education_total.Country == country_names[j]][years[k]])

total_spend.set_index(['Country','Year'], inplace = True) # set index
total_spend = total_spend / (10e+9) # change number format, in this case we want spending amount as billions 
total_spend.reset_index(drop=False, inplace=True)

for i in ['Military','Health','Education']:
    total_spend[i] = total_spend[i].apply(lambda x: round(x, 2)) # round to two digits after decimal

total_spend.to_csv(path_or_buf="../data/datasets/total_spend.csv", index = False) # save dataframe without index

years = ["2011","2012","2013","2014","2015","2016"] # a new list of years excluding total

Absolute Expenditure Growth¶

# create a dataframe with defined columns and number of rows
total_spend_grow = pd.DataFrame(index=np.arange(len(years)*len(fields)),
                           columns = ["Field","Year","Australia","Canada","France","Germany","Italy","Japan","South Korea","Mexico","United Kingdom","United States",
                "Brazil","India","Indonesia","Russia","South Africa","Turkey","China","Argentina","Saudi Arabia"])

# get the values for the dataframe
for i in range (0, len(fields)*len(years), len(years)):
    for j in range(0, len(years)):
        k = int(i/len(years))
        total_spend_grow.ix[i + j, "Field"] = fields[k]
        total_spend_grow.ix[i + j, "Year"] = years[j]
        for name in country_names:
            if total_spend_grow.ix[i + j, "Field"] == "Military":
                total_spend_grow.ix[i + j, name] = float(military_total[military_total.Country == name][years[j]])
            elif total_spend_grow.ix[i + j, "Field"] == "Health":
                total_spend_grow.ix[i + j, name] = float(health_total[military_total.Country == name][years[j]])
            else:
                total_spend_grow.ix[i + j, name] = float(education_total[military_total.Country == name][years[j]])

total_spend_grow.set_index(['Field','Year'], inplace = True)
total_spend_grow = total_spend_grow / (10e+9)  # change format to billions
total_spend_grow.reset_index(drop=False, inplace=True)
for i in country_names:
    total_spend_grow[i] = total_spend_grow[i].apply(lambda x: round(x, 2)) # round to two digits after decimal

total_spend_grow.to_csv(path_or_buf="../data/datasets/total_spend_grow.csv", index = False)

Share of GDP¶

military_pct = pd.read_csv("../data/cleaned data/military_pct.csv")
health_pct = pd.read_csv("../data/cleaned data/health_pct.csv")
education_pct = pd.read_csv("../data/cleaned data/education_pct.csv")

shareGDP = pd.DataFrame(index=np.arange(len(years)*len(country_names)),
                           columns = ["Country","Year","Military","Health","Education"])

for i in range (0, len(country_names)*len(years), len(country_names)):
    for j in range(0, len(country_names)):
        k = int(i/len(country_names))
        shareGDP.ix[i + j, "Year"] = years[k]
        shareGDP.ix[i + j, "Country"] = country_names[j]
        
        shareGDP.ix[i + j, "Military"] = float(military_pct[military_pct.Country == country_names[j]][years[k]])
        
        shareGDP.ix[i + j, "Health"] = float(health_pct[health_pct.Country == country_names[j]][years[k]])
        
        shareGDP.ix[i + j, "Education"] = float(education_pct[education_pct.Country == country_names[j]][years[k]])

# get another column as average share of GDP for all 3 categories 
# row without education data will be ignored when calculating
shareGDP["Average"] = shareGDP[["Military","Health","Education"]].mean(1)

shareGDP.set_index(['Country','Year'], inplace = True)
shareGDP = shareGDP * 100
for i in shareGDP.columns:
    shareGDP[i] = shareGDP[i].apply(lambda x: round(x, 2)) # when shown as % there will be 2 digits after decimal

shareGDP.reset_index(drop=False, inplace=True)

shareGDP.to_csv(path_or_buf="../data/datasets/shareGDP.csv", index = False)

Expenditure Per Capita¶

military_pp = pd.read_csv("../data/cleaned data/military_pp.csv")
health_pp = pd.read_csv("../data/cleaned data/health_pp.csv")
education_pp = pd.read_csv("../data/cleaned data/education_pp.csv")

capita_spend = pd.DataFrame(index=np.arange(len(years)*len(country_names)),
                           columns = ["Country","Year","Military","Health","Education"])

for i in range (0, len(country_names)*len(years), len(country_names)):
    for j in range(0, len(country_names)):
        k = int(i/len(country_names))
        capita_spend.ix[i + j, "Year"] = years[k]
        capita_spend.ix[i + j, "Country"] = country_names[j]
        
        capita_spend.ix[i + j, "Military"] = float(military_pp[military_pp.Country == country_names[j]][years[k]])
        
        capita_spend.ix[i + j, "Health"] = float(health_pp[health_pp.Country == country_names[j]][years[k]])
        
        capita_spend.ix[i + j, "Education"] = float(education_pp[education_pp.Country == country_names[j]][years[k]])

for i in ['Military','Health','Education']:
    capita_spend[i] = capita_spend[i].apply(lambda x: round(x, 2))

capita_spend.to_csv(path_or_buf="../data/datasets/capita_spend.csv", index = False)

Expenditure Change¶

# read csv and save to dataframe
military_change = pd.read_csv("../data/cleaned data/military_change.csv")
health_change = pd.read_csv("../data/cleaned data/health_change.csv")
education_change = pd.read_csv("../data/cleaned data/education_change.csv")

military_change['Field'] = "Military"   # adding another column that specify the field in each dataframe
health_change['Field'] = "Health"
education_change['Field'] = "Education"

total_change = pd.concat([military_change,health_change,education_change]) # combine 3 dataframes into one

total_change.set_index(['Country','Field'], inplace = True)
total_change = total_change/ (10e+6)  # format spending amount to millions
for i in total_change.columns:
    total_change[i] = total_change[i].apply(lambda x: round(x, 2)) # round to 2 digits after decimal

total_change.reset_index(drop=False, inplace=True)

total_change.to_csv(path_or_buf="../data/datasets/total_change.csv", index = False)

# in the next parts, we use similar methods to create datasets for visualization

Pecentage Change¶

military_change_pct = pd.read_csv("../data/cleaned data/military_change_pct.csv")
health_change_pct = pd.read_csv("../data/cleaned data/health_change_pct.csv")
education_change_pct = pd.read_csv("../data/cleaned data/education_change_pct.csv")

military_change_pct['Field'] = "Military"
health_change_pct['Field'] = "Health"
education_change_pct['Field'] = "Education"

pct_change = pd.concat([military_change_pct,health_change_pct,education_change_pct])

pct_change.set_index(['Country','Field'], inplace = True)
for i in pct_change.columns:
    pct_change[i] = pct_change[i].apply(lambda x: round(x, 4))

pct_change.reset_index(drop=False, inplace=True)

pct_change.to_csv(path_or_buf="../data/datasets/pct_change.csv", index = False)

Total Expenditure by Years¶

military_total['Field'] = "Military"
health_total['Field'] = "Health"
education_total['Field'] = "Education"

total_spend_trend = pd.concat([military_total,health_total,education_total])

total_spend_trend.set_index(['Country','Field'], inplace = True)
total_spend_trend = total_spend_trend/ (10e+9)
for i in total_spend_trend.columns:
    total_spend_trend[i] = total_spend_trend[i].apply(lambda x: round(x, 2))

total_spend_trend.reset_index(drop=False, inplace=True)

total_spend_trend.to_csv(path_or_buf="../data/datasets/total_spend_trend.csv", index = False)

Expenditure per Capita by Years¶

military_pp['Field'] = "Military"
health_pp['Field'] = "Health"
education_pp['Field'] = "Education"

capita_spend_trend = pd.concat([military_pp,health_pp,education_pp])

capita_spend_trend.set_index(['Country','Field'], inplace = True)
for i in capita_spend_trend.columns:
    capita_spend_trend[i] = capita_spend_trend[i].apply(lambda x: round(x, 2))

capita_spend_trend.reset_index(drop=False, inplace=True)

capita_spend_trend.to_csv(path_or_buf="../data/datasets/capita_spend_trend.csv", index = False)

capita_spend_grow1 = pd.DataFrame(index=np.arange(len(years)*len(country_names)),
                           columns = ["Country","Year","Amount"])
capita_spend_grow2 = pd.DataFrame(index=np.arange(len(years)*len(country_names)),
                           columns = ["Country","Year","Amount"])
capita_spend_grow3 = pd.DataFrame(index=np.arange(len(years)*len(country_names)),
                           columns = ["Country","Year","Amount"])

for i in range (0, len(country_names)*len(years), len(country_names)):
    for j in range(0, len(country_names)):
        k = int(i/len(country_names))
        capita_spend_grow1.ix[i + j, "Year"] = years[k]
        capita_spend_grow1.ix[i + j, "Country"] = country_names[j]
        
        capita_spend_grow1.ix[i + j, "Amount"] = float(military_pp[military_pp.Country == country_names[j]][years[k]])
        
for i in range (0, len(country_names)*len(years), len(country_names)):
    for j in range(0, len(country_names)):
        k = int(i/len(country_names))
        capita_spend_grow2.ix[i + j, "Year"] = years[k]
        capita_spend_grow2.ix[i + j, "Country"] = country_names[j]
        
        capita_spend_grow2.ix[i + j, "Amount"] = float(health_pp[health_pp.Country == country_names[j]][years[k]])
        
        
        
for i in range (0, len(country_names)*len(years), len(country_names)):
    for j in range(0, len(country_names)):
        k = int(i/len(country_names))
        capita_spend_grow3.ix[i + j, "Year"] = years[k]
        capita_spend_grow3.ix[i + j, "Country"] = country_names[j]
        
        capita_spend_grow3.ix[i + j, "Amount"] = float(education_pp[education_pp.Country == country_names[j]][years[k]])

capita_spend_grow1["Field"] = "Military"
capita_spend_grow2["Field"] = "Health"
capita_spend_grow3["Field"] = "Education"

capita_spend_grow = pd.concat([capita_spend_grow1, capita_spend_grow2, capita_spend_grow3])

capita_spend_grow.to_csv(path_or_buf="../data/datasets/capita_spend_grow.csv", index = False)