0

So I am currently working with a CSV file in Rstudio and I am trying to add a new column called “Cumulative.Cost” that actively accumulates the numbers in the preceding column "Total.Cost" as the rows numbers increase.

Here's the dput of my maintenanceCost1.1 CSV data table:

structure(list(Unit.ID = c(164L, 164L, 164L, 164L, 164L, 164L, 
164L, 164L, 164L, 164L, 164L, 164L, 164L, 164L, 164L, 164L, 164L, 
164L, 164L, 164L, 164L, 164L, 164L, 164L, 164L, 164L, 164L, 164L, 
164L, 164L, 165L, 165L, 165L, 165L, 165L, 165L, 967L, 967L, 967L, 
967L, 967L, 967L, 967L, 967L, 967L, 967L, 967L, 967L, 967L, 967L, 
967L, 967L, 967L, 967L, 967L, 967L, 967L, 967L, 967L, 967L, 967L, 
967L, 967L, 967L, 967L, 1054L, 1054L, 1054L, 1054L, 1054L, 1054L, 
1054L, 1054L, 1054L, 1054L, 1054L, 1054L, 1054L, 1054L, 1054L, 
1054L, 1054L, 1054L, 1054L, 1054L, 1054L, 1054L, 1160L, 1160L, 
1160L, 1160L, 1160L, 1160L, 1160L, 1160L, 1160L, 1160L, 925L, 
925L, 925L, 925L), Hour.Meter.Reading = c(2321L, 2524L, 2704L, 
2777L, 2960L, 3175L, 3371L, 3386L, 3570L, 3740L, 3854L, 3976L, 
4138L, 4216L, 4305L, 4483L, 4489L, 4721L, 4947L, 4968L, 5316L, 
5533L, 5712L, 5951L, 6165L, 6194L, 6439L, 6636L, 6702L, 6918L, 
4138L, 4326L, 4489L, 4652L, 4805L, 4943L, 381L, 600L, 783L, 1038L, 
1200L, 1388L, 1579L, 1755L, 1936L, 2136L, 2342L, 2382L, 2576L, 
2873L, 3080L, 3264L, 3485L, 3690L, 3990L, 4253L, 4353L, 4581L, 
4580L, 4784L, 4828L, 4954L, 5298L, 5407L, 5628L, 532L, 732L, 
947L, 1282L, 1481L, 1704L, 1728L, 1926L, 2063L, 2293L, 2505L, 
2811L, 2997L, 3170L, 3425L, 3746L, 3991L, 4078L, 4249L, 4500L, 
5747L, 4742L, 796L, 1016L, 1290L, 1528L, 1671L, 1906L, 2031L, 
2205L, 2425L, 2731L, 34L, 3863L, 4103L, 4106L), Labour.Cost = c(315, 
805, 577.5, 6440, 4910, 4140, 525, 1995, 2292.5, 2012.5, 945, 
3307.5, 1872.5, 1767.5, 1645, 1452.5, 11777.5, 2817.5, 4112.5, 
3622.5, 3675, 3150, 2436, 1974, 3843, 14532, 2373, 2919, 7098, 
2205, 1435, 1067.5, 5670, 5350, 3865, 910, 4270, 2730, 3097.5, 
2695, 1837.5, 2572.5, 840, 560, 2747.5, 2030, 1890, 2688, 1701, 
588, 1911, 9219, 1491, 4389, 714, 3108, 3087, 4158, 2916, 1596, 
714, 1197, 714, 4767, 2142, 542.5, 682.5, 871.5, 1092, 1092, 
693, 693, 7959, 1764, 7434, 3024, 1281, 1470, 3738, 1239, 1134, 
756, 1302, 2037, 4872, 210, 84, 336, 525, 1995, 924, 1575, 2100, 
1092, 5355, 504, 777, 1102.5, 490, 297.5, 875), Parts.Cost = c(12.62, 
243.5735, 405.959, 4647.767, 9961.5883, 5416.7948, 253.3975, 
1390.5058, 5128.0276, 5760.523, 628.643, 14481.7111, 2866.2141, 
2935.825, 2644.1532, 523.9236, 18247.2776, 10599.0455, 1968.7669, 
8023.1254, 14873.8008, 6442.1719, 4378.1336, 12144.6891, 13094.8609, 
20582.1682, 2544.103, 16934.6748, 17344.5551, 8912.7088, 4229.31, 
418.679, 8115.265, 6064.3123, 3587.7566, 472.1537, 6451.9113, 
6729.7326, 9224.9012, 2009.3578, 4555.0977, 6249.232, 3415.2062, 
2554.7489, 3348.7162, 4404.6421, 11405.5704, 2554.2787, 6104.7416, 
489.477, 2381.7686, 16538.1428, 7759.8042, 8556.2625, 3796.3243, 
11683.7093, 11665.8066, 31786.6422, 13727.0801, 1228.3541, 3666.517, 
1417.8506, 5831.6884, 10193.2736, 7525.4562, 2235.8885, 8357.0427, 
6890.5315, 3458.6842, 4430.8058, 2151.4558, 2233.2055, 2563.289, 
1940.2806, 10553.4599, 12625.7525, 12842.3638, 1706.0572, 15177.0692, 
8699.7549, 1923.699, 11671.4333, 7140.2504, 7452.8686, 25011.6846, 
1691.4286, 119.4423, 1957.0181, 3156.4815, 1546.2183, 3835.5721, 
4868.2379, 7433.8141, 2173.6179, 5626.8595, 1907.3543, 3609.1684, 
657.6733, 158.4313, 1326.837, 441.0999), Total.Cost = c(327.62, 
1048.5735, 983.459, 11087.767, 14871.5883, 9556.7948, 778.3975, 
3385.5058, 7420.5276, 7773.023, 1573.643, 17789.2111, 4738.7141, 
4703.325, 4289.1532, 1976.4236, 30024.7776, 13416.5455, 6081.2669, 
11645.6254, 18548.8008, 9592.1719, 6814.1336, 14118.6891, 16937.8609, 
35114.1682, 4917.103, 19853.6748, 24442.5551, 11117.7088, 5664.31, 
1486.179, 13785.265, 11414.3123, 7452.7566, 1382.1537, 10721.9113, 
9459.7326, 12322.4012, 4704.3578, 6392.5977, 8821.732, 4255.2062, 
3114.7489, 6096.2162, 6434.6421, 13295.5704, 5242.2787, 7805.7416, 
1077.477, 4292.7686, 25757.1428, 9250.8042, 12945.2625, 4510.3243, 
14791.7093, 14752.8066, 35944.6422, 16643.0801, 2824.3541, 4380.517, 
2614.8506, 6545.6884, 14960.2736, 9667.4562, 2778.3885, 9039.5427, 
7762.0315, 4550.6842, 5522.8058, 2844.4558, 2926.2055, 10522.289, 
3704.2806, 17987.4599, 15649.7525, 14123.3638, 3176.0572, 18915.0692, 
9938.7549, 3057.699, 12427.4333, 8442.2504, 9489.8686, 29883.6846, 
1901.4286, 203.4423, 2293.0181, 3681.4815, 3541.2183, 4759.5721, 
6443.2379, 9533.8141, 3265.6179, 10981.8595, 2411.3543, 4386.1684, 
1760.1733, 648.4313, 1624.337, 1316.0999)), row.names = c(17L, 
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 
31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 
44L, 45L, 46L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 
73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 
86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 
99L, 100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 
110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 
121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 
132L, 133L), class = "data.frame")

Here's my code so far:

library(dplyr)
library(tidyr)
library(tidyverse)
library(gdata)
library(ggplot2)

#Setting the working directory and reading the file
setwd("C:/Users/ahmed/Documents/Assignment 5")

#Step 1

maintenanceCost <- read.csv(file="C:\\Users\\ahmed\\Documents\\Assignment 5\\maintenance_cost.csv", header=TRUE, sep=",")
view(maintenanceCost)

maintenanceCost1.1 <- maintenanceCost %>%
  subset(Total.Cost > 0)
view(maintenanceCost1.1)

I am trying to find a function to add a new column to the overall maintenanceCost1.1 table to accumalate the "Total.Cost" column into a new column. I am thinking there's a way to use "cumsum" and "mutatate" function together to achieve this but not sure how exactly.

Amsi
  • 11
  • 2

2 Answers2

0

Are you looking for:

library(dplyr)

maintenanceCost1.1 %>%
  mutate(Cumulative.Cost = cumsum(Total.Cost))

Unit.ID Hour.Meter.Reading Labour.Cost Parts.Cost Total.Cost Cumulative.Cost
1       164               2321       315.0      12.62      327.6           327.6
2       164               2524       805.0     243.57     1048.6          1376.2
3       164               2704       577.5     405.96      983.5          2359.7
4       164               2777      6440.0    4647.77    11087.8         13447.4
5       164               2960      4910.0    9961.59    14871.6         28319.0
6       164               3175      4140.0    5416.79     9556.8         37875.8
7       164               3371       525.0     253.40      778.4         38654.2
8       164               3386      1995.0    1390.51     3385.5         42039.7
9       164               3570      2292.5    5128.03     7420.5         49460.2
10      164               3740      2012.5    5760.52     7773.0         57233.3
Ben
  • 28,684
  • 5
  • 23
  • 45
  • Yes that's exactly what I was looking for I just added it to my existing code maintenanceCost1.1 <- maintenanceCost %>% subset(Total.Cost > 0) %>% mutate(Cumulative.Cost = cumsum(Total.Cost)) view(maintenanceCost1.1) – Amsi Nov 22 '19 at 23:48
  • Oh - my mistake - thought you wanted to group by `Unit.ID` - will revise. – Ben Nov 22 '19 at 23:49
0

If there is no need to group by a particular variable, you can go simple and do :

MaintenanceCost1.1$Cumulative.cost <- cumsum(MaintenanceCost1.1$Total.Cost)
dc37
  • 15,840
  • 4
  • 15
  • 32