I’m doing a full merge of 3 data sets
I have 3 data sets to merge, I’ve done a full merge as I want them all to combine with each other rather than bind rows which just stacks them.
dput(head(country))
structure(list(location = structure(1:6, levels = c("Afghanistan",
"Albania", "Algeria", "Andorra", "Angola", "Argentina"), class = "factor"), lockdown_date = structure(c(18345, 18329,
18345, 18337, 18345, 18341), class = "Date"), Type = c("Full",
"Full", "Full", "Full", "Full", "Full"), Reference = c("https://www.thestatesman.com/world/afghan-govt-imposes-lockdown-coronavirus-cases-increase-15-1502870945.html",
"https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Albania",
"https://www.garda.com/crisis24/news-alerts/325896/algeria-government-implements-lockdown-and-curfew-in-blida-and-algiers-march-23-update-7",
"https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Andorra",
"https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_Angola",
"https://www.bloomberg.com/news/articles/2020-03-20/argentina-orders-exceptional-lockdown-in-bid-to-contain-virus"
)), row.names = c(NA, 6L), class = "data.frame")
dput(head(covid))
structure(list(location = structure(c(1L, 1L, 1L, 1L, 1L, 1L), levels = c("Australia",
"China", "France", "Iran", "Italy", "Spain", "United Kingdom",
"United States"), class = "factor"), date = structure(c(18261,
18262, 18263, 18264, 18265, 18266), class = "Date"), total_cases = c(0L,
0L, 0L, 0L, 0L, 0L), new_cases = c(0L, 0L, 0L, 0L, 0L, 0L), total_deaths = c(0,
0, 0, 0, 0, 0), new_deaths = c(0, 0, 0, 0, 0, 0), gdp_per_capita = c(44648.71,
44648.71, 44648.71, 44648.71, 44648.71, 44648.71), population = c(25499881L,
25499881L, 25499881L, 25499881L, 25499881L, 25499881L)), row.names = c(NA,
6L), class = "data.frame")
dput(head(vaccination))
structure(list(location = structure(1:6, levels = c("Afghanistan",
"Albania", "Algeria", "Angola", "Argentina"), class = "factor"), Doses.administered.per.100.people = c(17L,
102L, 35L, 64L, 237L, 73L), total_doses_administered = c(6445359,
2906126, 15205854, 20397115, 106474858, 2150112), X..of.population.vaccinated = c(15,
46, 19, 41, 92, 38), perc_pop_vaccinated = c(13, 44, 16, 22,
84, 33)), row.names = c(NA, 6L), class = "data.frame")
MERGED DATA
> dput(head(df))
structure(list(location = structure(1:6, levels = c("Afghanistan",
"Albania", "Algeria", "Andorra", "Angola", "Argentina", "Armenia",
"Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh",
"Barbados", "Belgium", "Belize", "Bhutan", "Bolivia", "Bosnia and Herzegovina",
"Botswana", "Bulgaria", "Burkina Faso", "Cambodia", "Canada",
"Chile", "China", "Colombia", "Congo (Kinshasa)", "Costa Rica",
"Croatia", "Cyprus", "Czechia", "Denmark", "Dominican Republic",
"Ecuador", "Egypt", "El Salvador", "Estonia", "Ethiopia", "Fiji",
"Finland", "France", "Gambia", "Georgia", "Germany", "Ghana",
"Greece", "Honduras", "Hungary", "Iceland", "India", "Indonesia",
"Iran", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan",
"Jersey", "Jordan", "Kazakhstan", "Kenya", "Kosovo", "Kuwait",
"Kyrgyzstan", "Latvia", "Lebanon", "Liechtenstein", "Lithuania",
"Luxembourg", "Madagascar", "Malaysia", "Maldives", "Mali", "Malta",
"Mauritius", "Mexico", "Moldova", "Monaco", "Mongolia", "Montenegro",
"Morocco", "Namibia", "Nepal", "Netherlands", "New Zealand",
"Nigeria", "North Macedonia", "Norway", "Oman", "Pakistan", "Palestine",
"Panama", "Paraguay", "Peru", "Philippines", "Poland", "Portugal",
"Qatar", "Romania", "Russia", "Rwanda", "Saint Lucia", "San Marino",
"Saudi Arabia", "Senegal", "Serbia", "Singapore", "Slovakia",
"Slovenia", "Somalia", "South Africa", "South Korea", "Spain",
"Sri Lanka", "Switzerland", "Syria", "Taiwan", "Thailand", "Togo",
"Trinidad and Tobago", "Tunisia", "Turkey", "Uganda", "Ukraine",
"United Arab Emirates", "United Kingdom", "United States", "Uruguay",
"Uzbekistan", "Vatican City", "Venezuela", "Vietnam", "Zimbabwe",
"Aruba", "Belarus", "Benin", "Brazil", "Brunei", "Burundi", "Cameroon",
"Cape Verde", "Central African Republic", "Chad", "Comoros",
"Congo", "Cuba", "Curaçao", "Czech Republic", "Djibouti", "Equatorial Guinea",
"Eswatini", "French Polynesia", "Gabon", "Grenada", "Guatemala",
"Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Hong Kong", "Ivory Coast",
"Kiribati", "Laos", "Lesotho", "Liberia", "Libya", "Macau", "Malawi",
"Mauritania", "Mozambique", "Myanmar", "New Caledonia", "Nicaragua",
"Niger", "Papua New Guinea", "Republic of the Congo", "Saint Vincent and the Grenadines",
"Samoa", "São Tomé and Principe", "Sierra Leone", "Solomon Islands",
"South Sudan", "Sudan", "Suriname", "Sweden", "Tajikistan", "Tanzania",
"Timor-Leste", "Tonga", "Turkmenistan", "Vanuatu", "West Bank & Gaza",
"Yemen", "Zambia"), class = "factor"), date = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"),
total_cases = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), new_cases = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), total_deaths = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), new_deaths = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), gdp_per_capita = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), population = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), lockdown_date = structure(c(18345, 18329, 18345, 18337,
18345, 18341), class = "Date"), total_doses_administered = c(6445359,
2906126, 15205854, NA, 20397115, 106474858), perc_pop_vaccinated = c(13,
44, 16, NA, 22, 84)), row.names = c(NA, 6L), class = "data.frame")
Some dates in the date column are being repeated due to the lockdown_date column having multiple lockdown dates per country. For example china 31/12/19 is repeated 3 times in the df dataframe, whereas in the covid data frame it only appears once. How do i merge these dataframes better?
this is my merge
df <- merge(country, covid, by = "location", all = TRUE)
df <- merge(df, vaccination, by = "location", all = TRUE)
Grep is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.