Assignment 2 (In Progress)

October 2nd, 2025

The following visualizations are done using data from the happy planet index Explore the data - Happy Planet Index.

Basic Plot of Life Expectancy vs Carbon Footprint

### Basic Scatter Plot for Life Expectancy vs Carbon Footprint
# Installing hpi & Load library


library(readxl)

Warning: package 'readxl' was built under R version 4.5.2

HPI_dataset <- read_excel("C:/Users/marti/OneDrive/Desktop/test/HPI_dataset.xlsx", 
    sheet = "1. All countries", 
    range = "A9:L158")

New names:
• `` -> `...4`

hpi <- HPI_dataset
View(hpi)

names(hpi)

 [1] "HPI rank"                          "Country"                          
 [3] "ISO"                               "...4"                             
 [5] "Continent"                         "Population (thousands)"           
 [7] "Life Expectancy (years)"           "Ladder of life (Wellbeing) (0-10)"
 [9] "Carbon Footprint (tCO2e)"          "HPI"                              
[11] "CO2 threshold for year  (tCO2e)"   "GDP per capita ($)"

## Start plotting from basics 
# Quick look at the variable names
# names(hpi)

# Clean up column names for easier reference (optional)
colnames(hpi) <- make.names(colnames(hpi))

# Extract the columns you need
x <- hpi$`Life.Expectancy..years.`   # adjust name if slightly di ``fferent
y <- hpi$`Carbon.Footprint..tCO2e.`

# Basic sanity check
# head(cbind(x, y))

# Create scatterplot
plot(x, y,
     main = "Life Expectancy vs Carbon Footprint",
     xlab = "Life Expectancy (years)",
     ylab = "Carbon Footprint (tCO₂e)",
     pch = 19, col = "skyblue", cex = 1.3)

# Exercise: Can you generate these charts individually?  Try these functions 
# using another dataset. Be sure to work on the layout and margins

Scatterplot

# Scatterplot
# Note the incremental additions
# Installing hpi & Load library
library(readxl)
HPI_dataset <- read_excel("C:/Users/marti/OneDrive/Desktop/test/HPI_dataset.xlsx", 
    sheet = "1. All countries", 
    range = "A9:L158")

New names:
• `` -> `...4`

hpi <- HPI_dataset
View(hpi)
names(hpi)

 [1] "HPI rank"                          "Country"                          
 [3] "ISO"                               "...4"                             
 [5] "Continent"                         "Population (thousands)"           
 [7] "Life Expectancy (years)"           "Ladder of life (Wellbeing) (0-10)"
 [9] "Carbon Footprint (tCO2e)"          "HPI"                              
[11] "CO2 threshold for year  (tCO2e)"   "GDP per capita ($)"

colnames(hpi) <- make.names(colnames(hpi))

x <- hpi$`HPI.rank`   # chose to compare HPI rank to a few other factors
y1 <- hpi$`Carbon.Footprint..tCO2e.` # testing for correlation.
y2 <- hpi$`Life.Expectancy..years.` # testing for correlation.

# Setting label orientation, margins c(bottom, left, top, right) & text size
par(las=1, mar=c(4, 4, 2, 4), cex=.7, bg = "grey18") 
plot.new()

plot.window(range(x, na.rm=TRUE), range(c(y1, y2), na.rm=TRUE))
# What is the first number standing for? The Bottom Axis.
# X-axis based on HPI rank
axis(1, at=seq(min(x, na.rm=TRUE), max(x, na.rm=TRUE), by=20), col.axis = "chartreuse2")

# Y-axes based on the combined range of y1 and y2
axis(2, at=seq(0, 90, by=10), col.axis = "chartreuse2")
axis(4, at=seq(0, 90, by=10), col.axis = "chartreuse2")

lines(x, y1, col="cyan1", lwd=2)
lines(x, y2, col="violetred2", lwd=2)
points(x, y1, pch=18, cex=2, col="cyan1") # Try different cex value? I did not like any othercex value, so I just changed the pch instead.  
points(x, y2, pch=20, bg="chartreuse2", cex=2, col="violetred2")  # Changed background color to grey18.
par(col="chartreuse2", fg= "chartreuse2", col.axis="chartreuse2")

box(bty="u")
mtext("HPI Rank", side=1, line=2, cex=0.8)
mtext("Carbon Footprint (tCO2e)", side=2, line=2, las=0, cex=0.8)
mtext("Life Expectancy (years)", side=4, line=2, las=0, cex=0.8)
text(4, 5, "Bird 131")

par(mar=c(5.1, 4.1, 4.1, 2.1), col="chartreuse2" , fg= "chartreuse2", col.axis="chartreuse2")

#I was not really happy with how this turned out, I may redo this again with different variables.

Histogram

# Histogram of Population
library(readxl)
HPI_dataset <- read_excel("C:/Users/marti/OneDrive/Desktop/test/HPI_dataset.xlsx", 
    sheet = "1. All countries", range = "A9:L158")

New names:
• `` -> `...4`

hpi <- HPI_dataset

Y <- hpi$`Population (thousands)`
Y <- na.omit(Y)

# Use log10 scale because populations vary drastically
Y_log <- log10(Y)

# Plot
par(bg="grey18", 
    mar=c(4.5, 4.5, 3.1, 1),
    col.axis="chartreuse2", 
    col.lab="chartreuse2", 
    col.main="chartreuse2")

hist(Y_log,
     breaks=15,
     col="deepskyblue3",
     border="white",
     freq=FALSE,
     main="Distribution of Country Populations",
     xlab="Population in thousands",
     ylab="Density")

lines(density(Y_log), col="chartreuse2", lwd=2)
rug(Y_log, col="white")

# I am not yet satisfied with this, may go back and redo.

Barplot

library(readxl)

HPI_dataset <- read_excel("C:/Users/marti/OneDrive/Desktop/test/HPI_dataset.xlsx", 
    sheet = "1. All countries", 
    range = "A9:L158")

New names:
• `` -> `...4`

hpi <- HPI_dataset

# Make column names R-safe
names(hpi) <- make.names(names(hpi))

# Check column names (see the exact GDP column)
print(names(hpi))

 [1] "HPI.rank"                          "Country"                          
 [3] "ISO"                               "...4"                             
 [5] "Continent"                         "Population..thousands."           
 [7] "Life.Expectancy..years."           "Ladder.of.life..Wellbeing...0.10."
 [9] "Carbon.Footprint..tCO2e."          "HPI"                              
[11] "CO2.threshold.for.year...tCO2e."   "GDP.per.capita...."

# Aggregate mean GDP per capita by continent
gdp_by_continent <- aggregate(`GDP.per.capita....` ~ Continent,
                              data = hpi,
                              FUN = mean,
                              na.rm = TRUE)

# Rename columns for convenience
names(gdp_by_continent) <- c("Continent", "GDP.per.capita")

# 🗺️ Map numeric continent codes to names
continent_names <- c(
  "1" = "Latin America",
  "2" = "N. America & Oceania",
  "3" = "Western Europe",
  "4" = "Middle East",
  "5" = "Africa",
  "6" = "South Asia",
  "7" = "Eastern Europe & Central Asia",
  "8" = "East Asia"
)

# Replace codes with readable names
gdp_by_continent$Continent <- continent_names[as.character(gdp_by_continent$Continent)]

# Sort continents by GDP (descending)
gdp_by_continent <- gdp_by_continent[order(gdp_by_continent$GDP.per.capita, decreasing = TRUE), ]

# ---- Barplot ----
par(mar = c(8, 4, 3, 2))  # more space at bottom
midpts <- barplot(gdp_by_continent$GDP.per.capita,
                  names.arg = gdp_by_continent$Continent,
                  col = gray(seq(0.3, 0.8, length.out = nrow(gdp_by_continent))),
                  main = "Average GDP per Capita by Continent",
                  ylab = "Avg GDP per Capita (USD)",
                  ylim = c(0, max(gdp_by_continent$GDP.per.capita) * 1.1),
                  las = 2)  # rotate labels vertically

# Add GDP values inside bars
text(midpts, gdp_by_continent$GDP.per.capita / 2,
     labels = round(gdp_by_continent$GDP.per.capita, 0),
     col = "white", cex = 0.8)

Boxplot

library(readxl)

# --- Load Data ---
HPI_dataset <- read_excel("C:/Users/marti/OneDrive/Desktop/test/HPI_dataset.xlsx", 
    sheet = "1. All countries", 
    range = "A9:L158")

New names:
• `` -> `...4`

hpi <- HPI_dataset

# Make column names R-safe
names(hpi) <- make.names(names(hpi))

# 🗺️ Map numeric continent codes to names
continent_names <- c(
  "1" = "Latin America",
  "2" = "N. America & Oceania",
  "3" = "W. Europe",
  "4" = "Middle East",
  "5" = "Africa",
  "6" = "S. Asia",
  "7" = "E. Europe & C. Asia",
  "8" = "E. Asia"
)

# Apply mapping to replace numbers with names
hpi$Continent <- continent_names[as.character(hpi$Continent)]

# --- Boxplot: Life Expectancy by Continent ---
par(mar = c(8, 4, 3, 2))  # add space for labels

boxplot(hpi$`Life.Expectancy..years.` ~ hpi$Continent,
        col = gray(seq(0.3, 0.8, length.out = length(unique(hpi$Continent)))),
        main = "Life Expectancy by Continent",
        ylab = "Life Expectancy (Years)",
        xlab = "",
        las = 2,   # rotate x-axis labels
        outline = TRUE)

Persp

# Load data
library(readxl)
hpi <- read_excel("C:/Users/marti/OneDrive/Desktop/test/HPI_dataset.xlsx", 
                  sheet = "1. All countries", 
                  range = "A9:L158")

New names:
• `` -> `...4`

# Make R-safe column names
names(hpi) <- make.names(names(hpi))

# Pick variables
x <- hpi$GDP.per.capita....
y <- hpi$Carbon.Footprint..tCO2e.
z <- hpi$Life.Expectancy..years.

# Remove missing values
complete_data <- na.omit(data.frame(x, y, z))

# Define grid for persp()
x_seq <- seq(min(complete_data$x), max(complete_data$x), length = 30)
y_seq <- seq(min(complete_data$y), max(complete_data$y), length = 30)

# Interpolate z values on the grid
z_grid <- outer(x_seq, y_seq, function(a, b) {
  # Use a smooth function (linear model for simplicity)
  model <- lm(z ~ x + y, data = complete_data)
  predict(model, newdata = data.frame(x = a, y = b))
})

# Fix NA values
z_grid[is.na(z_grid)] <- min(complete_data$z, na.rm = TRUE)

# Plot
par(bg = "white", mar = c(1, 1, 1, 1))
persp(x_seq, y_seq, z_grid,
      theta = 30, phi = 25,
      expand = 0.6,
      col = "skyblue3",
      shade = 0.5,
      xlab = "GDP per Capita",
      ylab = "Ecological Footprint (gha)",
      zlab = "Life Expectancy (years)",
      ticktype = "detailed")

Piechart

library(readxl)

# Load the data
HPI_dataset <- read_excel("C:/Users/marti/OneDrive/Desktop/test/HPI_dataset.xlsx", 
    sheet = "1. All countries", 
    range = "A9:L158")

New names:
• `` -> `...4`

hpi <- HPI_dataset
names(hpi) <- make.names(names(hpi))  # make column names R-safe

# Map numeric continent codes to names
continent_names <- c(
  "1" = "Latin America",
  "2" = "N. America & Oceania",
  "3" = "W. Europe",
  "4" = "Middle East",
  "5" = "Africa",
  "6" = "S. Asia",
  "7" = "E. Europe & C. Asia",
  "8" = "E. Asia"
)

# Replace numeric codes with names
hpi$Continent <- continent_names[as.character(hpi$Continent)]

# Aggregate total carbon footprint per continent
carbon_by_continent <- aggregate(Carbon.Footprint..tCO2e. ~ Continent,
                                 data = hpi,
                                 FUN = sum,
                                 na.rm = TRUE)

# Sort for cleaner labeling
carbon_by_continent <- carbon_by_continent[order(carbon_by_continent$Carbon.Footprint..tCO2e., decreasing = TRUE), ]

# Create named vector for pie()
pie.values <- carbon_by_continent$Carbon.Footprint..tCO2e.
names(pie.values) <- carbon_by_continent$Continent

# 🎨 Pie chart
par(mar = c(0, 2, 1, 2), xpd = TRUE)
pie(pie.values,
    col = gray(seq(0.3, 1.0, length.out = length(pie.values))),
    main = "Total Carbon Footprint by Continent (tCO₂e)",
    clockwise = TRUE)