In [1]:
from pandas import DataFrame
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings("ignore")
In [2]:
G20_codes = ["AUS","CAN","FRA","DEU","ITA","JPN","KOR","MEX","GBR","USA",
             "BRA","IND","IDN","RUS","ZAF","TUR","CHN","ARG","SAU"]
years = ["2011","2012","2013","2014","2015","2016"]
In [3]:
country_names = ["Australia","Canada","France","Germany","Italy","Japan","South Korea","Mexico","United Kingdom","United States",
                "Brazil","India","Indonesia","Russia","South Africa","Turkey","China","Argentina","Saudi Arabia"]
In [4]:
def read_data(data_path, header, index):
    df = pd.read_csv(data_path, header = header, index_col = index)
    df = df.loc[G20_codes, years]
    return df
    
In [5]:
def change_abs(df):
    df1 = df.loc[:,["2012","2013","2014","2015","2016"]]
    df2 = df.loc[:,["2011","2012","2013","2014","2015"]]
    df2.columns = ["2012","2013","2014","2015","2016"]
    df_dif = df1 - df2
    df_dif["Annual Change"] = df_dif.loc[:,"2012":"2016"].mean(1)
    return df_dif

def change_pct(df):
    df1 = df.loc[:,["2012","2013","2014","2015","2016"]]
    df2 = df.loc[:,["2011","2012","2013","2014","2015"]]
    df2.columns = ["2012","2013","2014","2015","2016"]
    df_dif = df1 - df2
    df_dif_pct = df_dif / df2
    return df_dif_pct
    

GDP

In [6]:
GDP_total = read_data("../data/raw data/GDP.csv", 2, 1)
#GDP_total
In [7]:
GDP_pp = read_data("../data/raw data/GDPpp.csv", 2, 1)
#GDP_pp

Military Expenditure

In [8]:
military_total = read_data("../data/raw data/Military.csv", 2, 1)
military_total["total"] = military_total.loc[:,years].sum(1)
#military_total
In [9]:
military_pct = read_data("../data/raw data/Military%GDP.csv", 2, 1)
#military_pct
military_pct = military_pct / 100
In [10]:
military_pp = GDP_pp * military_pct 
#military_pp
In [11]:
military_change = change_abs(military_total)
#military_change
In [12]:
military_change_pct = change_pct(military_total)
#military_change_pct

Education Expenditure

In [13]:
education_pct = read_data("../data/raw data/Education%GDP.csv", 2, 1)
#education_pct
education_pct = education_pct / 100
In [14]:
education_total = GDP_total * education_pct 
education_total["total"] = education_total.loc[:,years].sum(1)
#education_total
In [15]:
education_pp = GDP_pp * education_pct 
#education_pp
In [16]:
education_change = change_abs(education_total)
#education_change
In [17]:
education_change_pct = change_pct(education_total)
#education_change_pct

Healthcare Expenditure

In [18]:
health_pct = read_data("../data/raw data/Health%GDP.csv", 2, 1)
#health_pct
health_pct = health_pct / 100
In [19]:
health_pp = read_data("../data/raw data/HealthPP.csv", 2, 1)
#health_pp
In [20]:
health_total = GDP_total * health_pct
health_total["total"] = health_total.loc[:,years].sum(1)
#health_total
In [21]:
health_change = change_abs(health_total)
#health_change
In [22]:
health_change_pct = change_pct(health_total)
#health_change_pct

Create cleaned data

In [23]:
def update(df):
    df.index.names = ["Country"]
    df.reset_index(drop=False, inplace=True)
    for i in range(0,len(G20_codes)):
        df.ix[i,"Country"] = country_names[i]
    return df
In [24]:
dict_df = {"GDP_total" : GDP_total, 
           "GDP_pp" : GDP_pp,
           "military_total" : military_total,
           "military_pp" : military_pp,
           "military_pct" : military_pct,
           "military_change" : military_change,
           "military_change_pct" : military_change_pct,
           "health_total" : health_total,
           "health_pp" : health_pp,
           "health_pct" : health_pct,
           "health_change" : health_change,
           "health_change_pct" : health_change_pct,
           "education_total" : education_total,
           "education_pp" : education_pp,
           "education_pct" : education_pct,
           "education_change" : education_change,
           "education_change_pct" : education_change_pct}
In [25]:
for name, df in dict_df.items():
    dict_df[name] = update(dict_df[name])
    dict_df[name].to_csv(path_or_buf="../data/cleaned data/" + name + ".csv", index = False)