I’m trying to create a plot in R using ggplot2 to represent the mean LOESS relationship between deficit and EVI for different land cover types.
I’m expecting the ribbon (representing mean ± standard deviation) to be symmetrical around the mean, but it appears asymmetric. Here’s what I’m doing:
First, I summarize my data:
summarized_data <- all_fitted_data %>%
group_by(Land_cover_ESA, year, month.x) %>%
summarize(
mean_deficit = mean(deficit, na.rm = TRUE),
mean_wevi = mean(wevi, na.rm = TRUE),
mean_loess_fitted = mean(loess_fitted, na.rm = TRUE),
sd_loess_fitted = sd(loess_fitted, na.rm = TRUE),
lower_bound= (mean_loess_fitted - sd_loess_fitted),
upper_bound= (mean_loess_fitted + sd_loess_fitted),
.groups = "drop"
)
Then, I create a plot for one land cover type:
tree= summarized_data %>%
filter(Land_cover_ESA== "Tree cover")
ggplot(tree, aes(x = mean_deficit, y = mean_loess_fitted)) +
geom_ribbon(data=tree, aes(x = mean_deficit, y = mean_loess_fitted, ymin = lower_bound, ymax = upper_bound),
fill = "grey", alpha = 0.3) +
geom_line(color = "blue", size = 0.3) +
labs(
title = "LOESS relationship between deficit and EVI by LC (ESA)",
subtitle = "Shaded area represents confidence bounds",
x = "Mean Deficit",
y = "Mean wEVI"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 8, face = "bold"),
strip.background = element_rect(fill = "lightgrey", color = "black"),
axis.title = element_text(face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 12),
plot.subtitle = element_text(hjust = 0.5, size = 10)
) +
scale_x_continuous(limits = c(0, 1000)) +
scale_y_continuous(limits = c(0, 0.45))
By definition, when we are adding and subtracting the same value (in this case, the standard deviation) from the mean, we should get a symmetrical interval around that mean. However, the resulting plot shows an asymmetric ribbon:
I don’t understand why this is happening. Here’s a sample of my data:
> dput(tree[c(1:50), 1:9])
structure(list(Land_cover_ESA = structure(c(5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), levels = c("No Data",
"Cropland, rainfed", "Cropland, irrigated", "Mosaic cropland / natural vegetation",
"Tree cover", "Mosaic tree and shrub / herbaceous cover", "Shrubland",
"Grassland", "Sparse vegetation", "Others", "Water bodies"), class = "factor"),
year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L), levels = c("2000",
"2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008",
"2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016",
"2017", "2018", "2019", "2020", "2021", "2022", "2023"), class = "factor"),
month.x = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 7, 8,
9, 10, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1), mean_deficit = c(82.4738494005972, 129.719510511283,
122.138110772945, 98.1388682065341, 196.748084933609, 358.585790797378,
518.712517495836, 626.193015876366, 669.779226063743, 639.303625951273,
312.301051501745, 7.77964141437868, 470.158355192669, 545.343790390942,
634.636604281387, 653.441189283013, 70.6323577331491, 138.125210402323,
212.834877984013, 358.719956626155, 506.499270794249, 570.133456752184,
581.157439544412, 584.437843544163, 281.295845272419, 21.1733288265801,
21.8933127246764, 33.6764009664231, 49.6029437749626, 102.756861458949,
225.5882262778, 381.722382074417, 549.485957312889, 628.697072420198,
630.287357612645, 569.37417191341, 300.535161512659, 35.4157519964775,
31.2320050591674, 39.7329497989952, 68.0565082588114, 87.4162979677824,
197.289795210899, 355.431571271699, 475.329969860966, 559.258913509294,
605.055712571694, 606.154900568934, 294.912178233039, 28.8375136546503
), mean_wevi = c(0.28000258814685, 0.263361762803314, 0.287030196990886,
0.355425144047255, 0.368079664166691, 0.342548067294819,
0.31889031133874, 0.289054334039985, 0.261696438503887, 0.243765207790926,
0.237859493518853, 0.195645245735521, 0.326300749965643,
0.319755887193707, 0.293930044687457, 0.292893726412389,
0.311127382207287, 0.331704295040896, 0.356646610694028,
0.341930311415441, 0.312507060090146, 0.299653413530774,
0.299505795328931, 0.28432770025717, 0.232493037456401, 0.213813988704767,
0.24219561440452, 0.272305743451518, 0.312835914084102, 0.368585621621942,
0.374483618583022, 0.342560362106118, 0.307508602097646,
0.292229092436128, 0.285979392523853, 0.268945849137492,
0.256662360559111, 0.233610767727536, 0.282395495099974,
0.302224390973131, 0.307733604813033, 0.361268428985046,
0.38123331447224, 0.350654242819773, 0.32485660608646, 0.308347496014839,
0.283198297249929, 0.261488012857099, 0.245937300896692,
0.241104671154802), mean_loess_fitted = c(0.291436181592393,
0.295348669436235, 0.295935430472096, 0.329926948287558,
0.344453319726963, 0.328160749178319, 0.30906615245252, 0.304001157389113,
0.297585252859528, 0.29418239114029, 0.31955516136069, 0.231239464024762,
0.314339625410141, 0.3100344041556, 0.304594044261034, 0.301446891430369,
0.32354072428349, 0.330911840072006, 0.347132513581626, 0.333057863538675,
0.314314997753093, 0.304781646840093, 0.301607955328855,
0.292571251307264, 0.326430645023785, 0.231733856019644,
0.257916626803131, 0.308756264555023, 0.317334933463467,
0.34279826320326, 0.347283676155667, 0.322787939723469, 0.306561864311482,
0.303463939159867, 0.302719200661782, 0.305489704026616,
0.324697559122468, 0.239974272773966, 0.296712401131459,
0.307112217025957, 0.306322626792751, 0.342684963061053,
0.352139113869946, 0.33031747548867, 0.31679099116593, 0.313032896195026,
0.306547695300594, 0.30281073714444, 0.324429704463515, 0.284845995066394
), sd_loess_fitted = c(0.0499010620359411, 0.0443954660857142,
0.0463486590155303, 0.0704782684398555, 0.0815158846197603,
0.0906198263527595, 0.0850584614476746, 0.0839888420300539,
0.0854734163976045, 0.0806612266455118, 0.0600646054884668,
0.0238718913978058, 0.0885158703675765, 0.0850907715785816,
0.0826486650776377, 0.0844573859814579, 0.0607229950220754,
0.0607457934131822, 0.08388338919698, 0.0950476171088816,
0.0884000043134731, 0.0788914586187457, 0.0812321816319821,
0.0742474128519292, 0.0696490691794528, 0.0204608489289175,
0.0515514207277261, 0.0734683024379082, 0.0752327799481906,
0.0785125497534724, 0.0860908474269935, 0.0889334847572743,
0.081282892614643, 0.0817077815108593, 0.0858112347282016,
0.0852425144242475, 0.074046710786605, 0.0307431721611555,
0.0591870586617848, 0.0554349270330144, 0.0531191520958542,
0.0797683150206312, 0.0838009764268977, 0.0845962623053035,
0.0857599222952444, 0.085996164950749, 0.0858879492287324,
0.0835705191951983, 0.0820454085482396, 0.0593733958549729
), lower_bound = c(0.241535119556452, 0.250953203350521,
0.249586771456565, 0.259448679847702, 0.262937435107202,
0.23754092282556, 0.224007691004846, 0.220012315359059, 0.212111836461924,
0.213521164494779, 0.259490555872223, 0.207367572626956,
0.225823755042564, 0.224943632577019, 0.221945379183396,
0.216989505448911, 0.262817729261414, 0.270166046658824,
0.263249124384646, 0.238010246429794, 0.22591499343962, 0.225890188221347,
0.220375773696873, 0.218323838455335, 0.256781575844332,
0.211273007090726, 0.206365206075405, 0.235287962117115,
0.242102153515277, 0.264285713449788, 0.261192828728673,
0.233854454966195, 0.225278971696839, 0.221756157649008,
0.21690796593358, 0.220247189602368, 0.250650848335863, 0.20923110061281,
0.237525342469675, 0.251677289992942, 0.253203474696896,
0.262916648040422, 0.268338137443048, 0.245721213183366,
0.231031068870685, 0.227036731244277, 0.220659746071862,
0.219240217949242, 0.242384295915275, 0.225472599211422),
upper_bound = c(0.341337243628335, 0.33974413552195, 0.342284089487626,
0.400405216727413, 0.425969204346723, 0.418780575531079,
0.394124613900195, 0.387989999419166, 0.383058669257133,
0.374843617785802, 0.379619766849156, 0.255111355422568,
0.402855495777717, 0.395125175734182, 0.387242709338672,
0.385904277411827, 0.384263719305565, 0.391657633485188,
0.431015902778606, 0.428105480647557, 0.402715002066566,
0.383673105458839, 0.382840136960837, 0.366818664159194,
0.396079714203237, 0.252194704948561, 0.309468047530857,
0.382224566992931, 0.392567713411658, 0.421310812956732,
0.43337452358266, 0.411721424480744, 0.387844756926125, 0.385171720670727,
0.388530435389983, 0.390732218450864, 0.398744269909073,
0.270717444935121, 0.355899459793244, 0.362547144058971,
0.359441778888605, 0.422453278081684, 0.435940090296843,
0.414913737793973, 0.402550913461174, 0.399029061145776,
0.392435644529327, 0.386381256339638, 0.406475113011754,
0.344219390921367)), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))
Can anyone explain why the ribbon is asymmetric and how to fix this?
2