I have a large dataset that has 12 quadrats per survey with a slew of data. Most is numeric, but I have one categorical variable (Grazing – Either “Present” or “Absent”), that I wish to utilize. I’ve created a single row of data per survey by averaging the numeric variables in the 12 quadrats to have one average value for each survey rather than 12, however I’m not sure how to have that carry over with my categorical variable.
As Grazing is either Present or Absent, it is rather simple conceptually. If any of the 12 quadrats in each survey has Grazing presence, Grazing should be present on the survey. If not, it should be absent.
Example data:
<code>CL.SGN.Data.Sample <- structure(list(Species = c("Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. marina", "Z. japonica",
"Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica",
"Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica",
"Z. japonica", "Z. marina", "Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. japonica", "Z. japonica",
"Z. japonica", "Z. japonica"), Survey_Date = structure(c(1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1357689600,
1357689600, 1357689600, 1357689600, 1357689600, 1357689600, 1357689600,
1357689600, 1357689600, 1357689600, 1357689600, 1357689600, 1357689600,
1357689600, 1357689600, 1357689600), tzone = "UTC", class = c("POSIXct",
"POSIXt")), Year = c("2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2012", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2013", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2013"), Quarter = c("4", "4", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1"), Site_Name = c("Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay"), Quad = c("Q1",
"Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9", "Q10", "Q11",
"Q12", "Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9",
"Q10", "Q11", "Q12", "Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7",
"Q8", "Q9", "Q10", "Q11", "Q12", "Q1", "Q2", "Q3", "Q4"), Cover = c(0.75,
0.75, 0.45, 0.65, 0.1, 0.05, 0.1, 0.05, 0.05, 0.01, 0.02, 0.01,
0, 0, 0.05, 0.05, 0, 0, 0, 0, 0, 0, 0, 0, 0.75, 0.65, 0.3, 0.75,
0.15, 0.15, 0.3, 0.1, 0.1, 0.1, 0.15, 0.05, 0, 0, 0.05, 0), Calculated_Density = c(65,
60, 26, 20, 3.75, 9, 9, 16, 8, 5, 5, 2, 0, 0, NA, NA, 0, 0, 0,
0, 0, 0, 0, 0, 9, 7.75, 3.75, 9.25, 1.5, 1, 2.5, 0.75, 6, 0.25,
1, 4, 0, 0, NA, 0), Average_Height = c(46.6666666666667, 39.9,
42, 53.5, 41.6666666666667, 40, 48.8333333333333, 54.6666666666667,
66, 30.5, 47.5, 50, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 22,
29.8333333333333, 23.2333333333333, 30, 34.1666666666667, 37.3333333333333,
39.5, 33.1666666666667, 43.6666666666667, 33.6666666666667, 45.8333333333333,
28.1666666666667, 0, 0, NA, 0), Grazing = c("Absent", "Absent",
"Absent", "Absent", "Absent", "Absent", "Absent", "Absent", "Absent",
"Absent", "Absent", "Absent", "Absent", "Absent", NA, NA, "Absent",
"Absent", "Absent", "Absent", "Absent", "Absent", "Absent", "Absent",
"Present", "Present", "Absent", "Present", "Present", "Absent",
"Present", "Present", "Absent", "Present", "Absent", "Absent",
"Absent", "Absent", NA, "Absent"), Year.Quarter = structure(c(2012.75,
2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75,
2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75,
2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75,
2012.75, 2012.75, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013,
2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013), class = "yearqtr")), row.names = c(NA,
-40L), class = c("tbl_df", "tbl", "data.frame"))
<code>CL.SGN.Data.Sample <- structure(list(Species = c("Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. marina", "Z. japonica",
"Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica",
"Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica",
"Z. japonica", "Z. marina", "Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. japonica", "Z. japonica",
"Z. japonica", "Z. japonica"), Survey_Date = structure(c(1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1357689600,
1357689600, 1357689600, 1357689600, 1357689600, 1357689600, 1357689600,
1357689600, 1357689600, 1357689600, 1357689600, 1357689600, 1357689600,
1357689600, 1357689600, 1357689600), tzone = "UTC", class = c("POSIXct",
"POSIXt")), Year = c("2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2012", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2013", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2013"), Quarter = c("4", "4", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1"), Site_Name = c("Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay"), Quad = c("Q1",
"Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9", "Q10", "Q11",
"Q12", "Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9",
"Q10", "Q11", "Q12", "Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7",
"Q8", "Q9", "Q10", "Q11", "Q12", "Q1", "Q2", "Q3", "Q4"), Cover = c(0.75,
0.75, 0.45, 0.65, 0.1, 0.05, 0.1, 0.05, 0.05, 0.01, 0.02, 0.01,
0, 0, 0.05, 0.05, 0, 0, 0, 0, 0, 0, 0, 0, 0.75, 0.65, 0.3, 0.75,
0.15, 0.15, 0.3, 0.1, 0.1, 0.1, 0.15, 0.05, 0, 0, 0.05, 0), Calculated_Density = c(65,
60, 26, 20, 3.75, 9, 9, 16, 8, 5, 5, 2, 0, 0, NA, NA, 0, 0, 0,
0, 0, 0, 0, 0, 9, 7.75, 3.75, 9.25, 1.5, 1, 2.5, 0.75, 6, 0.25,
1, 4, 0, 0, NA, 0), Average_Height = c(46.6666666666667, 39.9,
42, 53.5, 41.6666666666667, 40, 48.8333333333333, 54.6666666666667,
66, 30.5, 47.5, 50, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 22,
29.8333333333333, 23.2333333333333, 30, 34.1666666666667, 37.3333333333333,
39.5, 33.1666666666667, 43.6666666666667, 33.6666666666667, 45.8333333333333,
28.1666666666667, 0, 0, NA, 0), Grazing = c("Absent", "Absent",
"Absent", "Absent", "Absent", "Absent", "Absent", "Absent", "Absent",
"Absent", "Absent", "Absent", "Absent", "Absent", NA, NA, "Absent",
"Absent", "Absent", "Absent", "Absent", "Absent", "Absent", "Absent",
"Present", "Present", "Absent", "Present", "Present", "Absent",
"Present", "Present", "Absent", "Present", "Absent", "Absent",
"Absent", "Absent", NA, "Absent"), Year.Quarter = structure(c(2012.75,
2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75,
2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75,
2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75,
2012.75, 2012.75, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013,
2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013), class = "yearqtr")), row.names = c(NA,
-40L), class = c("tbl_df", "tbl", "data.frame"))
CL.SGN.Data.Sample
</code>
CL.SGN.Data.Sample <- structure(list(Species = c("Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. marina", "Z. japonica",
"Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica",
"Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica", "Z. japonica",
"Z. japonica", "Z. marina", "Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. marina", "Z. marina",
"Z. marina", "Z. marina", "Z. marina", "Z. japonica", "Z. japonica",
"Z. japonica", "Z. japonica"), Survey_Date = structure(c(1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1350518400,
1350518400, 1350518400, 1350518400, 1350518400, 1350518400, 1357689600,
1357689600, 1357689600, 1357689600, 1357689600, 1357689600, 1357689600,
1357689600, 1357689600, 1357689600, 1357689600, 1357689600, 1357689600,
1357689600, 1357689600, 1357689600), tzone = "UTC", class = c("POSIXct",
"POSIXt")), Year = c("2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2012", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2013", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2013"), Quarter = c("4", "4", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1"), Site_Name = c("Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay",
"Birch Bay", "Birch Bay", "Birch Bay", "Birch Bay"), Quad = c("Q1",
"Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9", "Q10", "Q11",
"Q12", "Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9",
"Q10", "Q11", "Q12", "Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7",
"Q8", "Q9", "Q10", "Q11", "Q12", "Q1", "Q2", "Q3", "Q4"), Cover = c(0.75,
0.75, 0.45, 0.65, 0.1, 0.05, 0.1, 0.05, 0.05, 0.01, 0.02, 0.01,
0, 0, 0.05, 0.05, 0, 0, 0, 0, 0, 0, 0, 0, 0.75, 0.65, 0.3, 0.75,
0.15, 0.15, 0.3, 0.1, 0.1, 0.1, 0.15, 0.05, 0, 0, 0.05, 0), Calculated_Density = c(65,
60, 26, 20, 3.75, 9, 9, 16, 8, 5, 5, 2, 0, 0, NA, NA, 0, 0, 0,
0, 0, 0, 0, 0, 9, 7.75, 3.75, 9.25, 1.5, 1, 2.5, 0.75, 6, 0.25,
1, 4, 0, 0, NA, 0), Average_Height = c(46.6666666666667, 39.9,
42, 53.5, 41.6666666666667, 40, 48.8333333333333, 54.6666666666667,
66, 30.5, 47.5, 50, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 22,
29.8333333333333, 23.2333333333333, 30, 34.1666666666667, 37.3333333333333,
39.5, 33.1666666666667, 43.6666666666667, 33.6666666666667, 45.8333333333333,
28.1666666666667, 0, 0, NA, 0), Grazing = c("Absent", "Absent",
"Absent", "Absent", "Absent", "Absent", "Absent", "Absent", "Absent",
"Absent", "Absent", "Absent", "Absent", "Absent", NA, NA, "Absent",
"Absent", "Absent", "Absent", "Absent", "Absent", "Absent", "Absent",
"Present", "Present", "Absent", "Present", "Present", "Absent",
"Present", "Present", "Absent", "Present", "Absent", "Absent",
"Absent", "Absent", NA, "Absent"), Year.Quarter = structure(c(2012.75,
2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75,
2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75,
2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75, 2012.75,
2012.75, 2012.75, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013,
2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013), class = "yearqtr")), row.names = c(NA,
-40L), class = c("tbl_df", "tbl", "data.frame"))
CL.SGN.Data.Sample
Example Code arriving at mean values. (Note in my actual code, Density, Height and Cover are separated to reduce data loss due to NA removal).
<code>SGN.ZM.Survey_Means <- CL.SGN.DATA %>%
filter(str_detect(Species, "Z. marina")) %>%
filter(!is.na(Calculated_Density),
summarize(across(c(Cover, Average_Height, Calculated_Density), mean), .by = c(Site_Name, Survey_Date))
<code>SGN.ZM.Survey_Means <- CL.SGN.DATA %>%
filter(str_detect(Species, "Z. marina")) %>%
filter(!is.na(Calculated_Density),
!is.na(Average_Height),
!is.na(Cover)) %>%
summarize(across(c(Cover, Average_Height, Calculated_Density), mean), .by = c(Site_Name, Survey_Date))
SGN.ZM.Survey_Means
</code>
SGN.ZM.Survey_Means <- CL.SGN.DATA %>%
filter(str_detect(Species, "Z. marina")) %>%
filter(!is.na(Calculated_Density),
!is.na(Average_Height),
!is.na(Cover)) %>%
summarize(across(c(Cover, Average_Height, Calculated_Density), mean), .by = c(Site_Name, Survey_Date))
SGN.ZM.Survey_Means
This is a case where if I knew what to search for I could find the answer, but in every search results have not been helpful.
I thought about adding Grazing into the summary like so, but obviously it produces NA’s as you can’t take a mean of categorical data…
<code>SGN.ZM.Survey_Means <- CL.SGN.DATA %>%
filter(str_detect(Species, "Z. marina")) %>%
filter(!is.na(Calculated_Density),
summarize(across(c(Cover, Average_Height, Calculated_Density, Grazing), mean), .by = c(Site_Name, Survey_Date))
<code>SGN.ZM.Survey_Means <- CL.SGN.DATA %>%
filter(str_detect(Species, "Z. marina")) %>%
filter(!is.na(Calculated_Density),
!is.na(Average_Height),
!is.na(Cover)) %>%
summarize(across(c(Cover, Average_Height, Calculated_Density, Grazing), mean), .by = c(Site_Name, Survey_Date))
SGN.ZM.Survey_Means
</code>
SGN.ZM.Survey_Means <- CL.SGN.DATA %>%
filter(str_detect(Species, "Z. marina")) %>%
filter(!is.na(Calculated_Density),
!is.na(Average_Height),
!is.na(Cover)) %>%
summarize(across(c(Cover, Average_Height, Calculated_Density, Grazing), mean), .by = c(Site_Name, Survey_Date))
SGN.ZM.Survey_Means