I have some data which looks like the following:
# A tibble: 20 × 4
hour day month sales_perc
<dbl> <int> <dbl> <dbl>
1 13 1 5 0.114
2 14 1 5 1.38
3 15 1 5 -0.0340
4 16 1 5 0.114
5 9 2 5 -0.353
6 10 2 5 -0.264
7 11 2 5 -0.199
8 12 2 5 -0.293
9 13 2 5 0.0159
10 14 2 5 0.333
11 15 2 5 0.248
12 16 2 5 0.607
13 9 3 5 0.184
14 10 3 5 -0.286
15 11 3 5 -0.0284
16 12 3 5 0.0137
17 13 3 5 0.131
18 14 3 5 0.178
19 15 3 5 0.129
20 16 3 5 0.249
It is daily timeseries data – at the hourly level. I will focus on observation 5 - 12
for illustration.
For each day I want to compute that if the first hour is less than -0.25
what is the probability that it will return to 0
for that day. That is, the sales_perc = -0.353
at the first hour 9
and at hour = 13
it is 0.0159
– so I want to compute the probability that on a given day the performance improves after the first hour.
(We can use a different number than -0.25
– it was just for illustraton)
- I am interested in knowing the probability that it crosses the 0% line in a given day.
- I would also like to compute the probability of it crossing the 0% line at each hour – i.e. at hour 10, at hour 11, hour 12, 13… etc.
Expected output:
- A
summarise()
for each day that the probability of thesales_perc
will be at or greater than 0 - A
mutate()
for each hour – the probability that in that hour it will be at or greater than 0
Data:
data = structure(list(hour = c(9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16, 9, 10,
11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16), day = c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L, 26L,
26L, 26L, 26L, 26L, 26L, 26L, 29L, 29L, 29L, 29L, 29L, 29L, 29L,
29L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), month = c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5), sales_perc = c(0.279894262167614,
0.695848235111166, 0.962136526201199, 1.13901415020992, 0.64725548126262,
0.657945887109312, 0.761934380345195, 0.853288757580467, -0.188530424578193,
0.0346280371674388, -0.207768223004545, -0.0711798541774669,
-0.0961889921317405, -0.106769781266222, -0.219310902060366,
-0.165445066466574, -0.201950262535333, -0.849152770565262, -0.597195776354491,
-0.603927451772351, -0.627007481776391, -0.657780855115104, -0.123093493354872,
-0.0269267016713762, 0.240858117991226, 0.333495855680163, 0.11701608971234,
0.276548025353487, 0.196977084349096, -0.0224280838615327, 0.274987810823982,
0.241833252072162, -0.0893082493641711, -0.35917448113849, 0.0524200594093971,
0.0427126410002356, 0.568854718776079, 0.702622944454135, 0.578562137185218,
0.594094006639864, 0.213907902924709, -0.375311138767903, -0.163347853142508,
-0.71367454884879, -0.729231487243311, -0.897440883634103, -0.665059116365902,
-0.690339141257003, 0.0310842577662013, -0.705224098070832, -0.469178015658688,
-0.904357624385606, -1.62512385133954, -1.86311269986207, -1.9874497309269,
-2.04184718201776, -0.206732148332792, -0.471428430121574, -0.245308960701111,
-0.191103593578446, -0.299712157630788, 0.0623163892103619, -0.394670464998962,
-0.337299820964033, -0.0118729593351147, -0.484812506183842,
-0.139742873113458, -0.353335847708274, -0.26400936935506, -0.199495801655517,
-0.292792345713315, 0.0158802628183578, 0.333485519185344, 0.248129106536713,
0.607420052801874, 0.18408632473611, -0.285921312887997, -0.0283962947731198,
0.0137085560973686, 0.131210465503401, 0.178211229265814, 0.129252100346636,
0.248712374909422)), row.names = c(NA, -168L), class = c("tbl_df",
"tbl", "data.frame"))716335213218562, -1.08043929949541, -0.583753833976461, -0.744038785000504,
-0.959730879588409, -0.940140496685472, -0.0877158008053901,
0.352856744148954, 0.252182927315495, -0.165463896973802, -0.398708185479048,
-0.530281886687139, -0.426617758462578, -0.43259838124477, 0.0540681258385529,
-0.166209423874078, -0.452570238500509, -0.454572761679713, -0.414522298095599,
-0.891122814746579, -0.855077397520873, -0.855077397520873, -0.0943661406256329,
-0.370437296711245, -0.134521945147174, 0.0702726579127058, 0.67662530618801,
0.656547403927234, 0.315223065494116, 0.319238645946266, 0.339070945609029,
0.765901430081572, 0.590382352354545, 0.761912360133234, 0.919480623092729,
0.869617248738459, 0.851666433970916, 0.932445100424838, 0.0434756832598912,
-0.156117226251401, -0.450566171966084, -0.336936545264112, 0.0790466968361627,
-0.316186787344617, -0.110665375570608, -0.632373574689245, -0.304170335387825,
-0.262146802209238, 0.10605939326022, 0.410229728648034, 0.754422476586885,
0.554310413831742, 0.744416873449126, 1.70695589530136, 0.130131314326268,
0.315469852912174, 0.284908710911312, 0.469261406206868, 0.441657794077055,
0.437714420915645, 0.20899877755432, 0.248432509168341, -0.0627574034124325,
-0.00392233771327355, -0.0392233771327689, 0.00882525985487663,
0.053932143557567, -0.0682486762110132, 0.029417532849585, 0.0303981172779034,
0.189109943522828, 0.0924887340850445, -0.306983883346125, -0.56673947694669,
-0.40537615364937, -0.664147824546904, -1.20628923391778, -1.38929885668182,
-0.0219595943464053, 0.0848438872474669, -0.135750219595945,
-0.0988181745588127, 0.0139742873113458, 1.2816417791264, -0.133753892837183,
0.0139742873113458, -0.353335847708274, -0.26400936935506, -0.199495801655517,
-0.292792345713315, 0.0158802628183578, 0.333485519185344, 0.248129106536713,
0.607420052801874, 0.18408632473611, -0.285921312887997, -0.0283962947731198,
0.0137085560973686, 0.131210465503401, 0.178211229265814, 0.129252100346636,
0.248712374909422)), row.names = c(NA, -168L), class = c("tbl_df",
"tbl", "data.frame"))