I have downloaded data set from an agency website and the datetime column (labelled ‘date’ in the df) for two of the files is not converting from chr to datetime.
For the other files the date time is in numeric format (example “01/09/2023 00:01:00”) and not ’01-oct-2023 00:01:00′ and they convert using dmy_hm. It may be an issue with the month abbreviation as oct instead of Oct but I’m really not sure. I’ve tried various methods but none are working for me and I’d be grateful for any help.
library(datapasta)
dpasta(subset_Rain_10.23)
tibble::tribble(
~stno, ~year, ~month, ~day, ~hour, ~minute, ~date, ~ispeed, ~speed, ~idir, ~dir, ~imaxgust, ~maxgust, ~idirmgust, ~dirmgust, ~itimemgust, ~timemgust, ~iminspeed, ~minspeed, ~ispeedstdev, ~speedstdev, ~idrybulb, ~drybulb, ~igrasstemp, ~grasstemp, ~it5cm, ~t5cm, ~it10cm, ~t10cm, ~it20cm, ~t20cm, ~it30cm, ~t30cm, ~it50cm, ~t50cm, ~it100cm, ~t100cm, ~irelhum, ~relhum, ~icbl, ~cbl, ~isoltot, ~soltot, ~irain, ~rain,
1275, 2023, 10, 1, 0, 0, "01-oct-2023 00:00:00", 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 0, 15.19, 0, 14.25, 0, 13.74, 0, 13.52, 0, 13.24, 0, 13.19, 0, 13.29, 0, 14.17, 0, 91.5, 0, 1006.34, 0, 0, 0, 0,
1275, 2023, 10, 1, 0, 1, "01-oct-2023 00:01:00", 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 0, 15.19, 0, 14.25, 0, 13.75, 0, 13.51, 0, 13.24, 0, 13.2, 0, 13.29, 0, 14.17, 0, 92, 0, 1006.335, 0, 0, 0, 0,
1275, 2023, 10, 1, 0, 2, "01-oct-2023 00:02:00", 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 0, 15.19, 0, 14.24, 0, 13.75, 0, 13.51, 0, 13.24, 0, 13.19, 0, 13.29, 0, 14.17, 0, 91.7, 0, 1006.32, 0, 0, 0, 0,
1275, 2023, 10, 1, 0, 3, "01-oct-2023 00:03:00", 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 0, 15.19, 0, 14.23, 0, 13.75, 0, 13.52, 0, 13.24, 0, 13.2, 0, 13.29, 0, 14.17, 0, 91.6, 0, 1006.33, 0, 0, 0, 0,
1275, 2023, 10, 1, 0, 4, "01-oct-2023 00:04:00", 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 0, 15.19, 0, 14.23, 0, 13.75, 0, 13.51, 0, 13.25, 0, 13.2, 0, 13.29, 0, 14.17, 0, 91.7, 0, 1006.328, 0, 0, 0, 0,
1275, 2023, 10, 1, 0, 5, "01-oct-2023 00:05:00", 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 4, NA, 0, 15.2, 0, 14.21, 0, 13.75, 0, 13.52, 0, 13.25, 0, 13.2, 0, 13.29, 0, 14.16, 0, 91.6, 0, 1006.315, 0, 0, 0, 0
)
str(subset_Rain_10.23) # date is chr
# Convert from chr to datetime
library(lubridate)
library(dplyr)
library(parsedate)
df1<-subset_Rain_10.23$date<-as.POSIXct(subset_Rain_10.23$date, format="%d%b%Y:%H:%M:%S") # returns format NA
str(df1)
df2<-subset_Rain_10.23%>%mutate(subset_Rain_10.23=lubridate::dmy_hms(subset_Rain_10.23)) # returns error in mutate, must be size 6 or 1, not 45
df3<-as.POSIXct(strptime(subset_Rain_10.23$date, format=%d%b%Y %H:%M:%S, tz= "UTC")) # error unexpected special
df4<- parse_date_time(subset_Rain_10.23, "01-oct-2023 00:00:00") # unknown formats supplied: oct
df5<-parse_date_time(subset_Rain_10.23, "01-oct-2023 00:00:00", %d%b%Y %H:%M:%S, tz= "UTC") # error unexpected special
^
1
It is important to ensure that your format string matches precisely the format of the column, including dashes. You need:
as.POSIXct(subset_Rain_10.23$date, format="%d-%b-%Y %H:%M:%S")
# [1] "2023-10-01 00:00:00 BST" "2023-10-01 00:01:00 BST"
# [3] "2023-10-01 00:02:00 BST" "2023-10-01 00:03:00 BST"
# [5] "2023-10-01 00:04:00 BST" "2023-10-01 00:05:00 BST"