0

I would like to perform a random forest for multiple species using ranger.

I can manage to do this separately for each individual species, although when I select for multiple species I get the following error:

detection_freq <- lapply(ebird_split$train[, 14:124], FUN=mean)

ebird_split$train[, 14:124] <- lapply(ebird_split$train[, 14:124], FUN=factor)

# grow random forest

rf <- ranger(formula =  ebird_split$train[, 14:124] ~ ., 
             data = ebird_split$train,
             importance = "impurity",
             probability = TRUE,
             replace = TRUE, 
             sample.fraction = c(detection_freq, detection_freq),)

Error in parse.formula(formula, data, env = parent.frame()) : Error: Illegal column names in formula interface. Fix column names or use alternative interface in ranger. sample.fraction = c(detection_freq, detection_freq),)

Here's a reproducible code using only 20 columns and 15 rows.

structure(list(year = c(2014, 2014, 2015, 2014, 2015, 2014, 2014, 
2015, 2014, 2014, 2015, 2014, 2014, 2015, 2014), pland_00_water = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), pland_01_evergreen_needleleaf = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), pland_04_deciduous_broadleaf = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), pland_05_mixed_forest = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), pland_09_savanna = c(0.125, 
0, 0, 0, 0, 0.2, 0, 0, 0.111111111111111, 0.25, 0, 0, 0.1, 0.1, 
0), pland_10_grassland = c(0, 0, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 
0.6, 0, 0, 0), pland_11_wetland = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0), pland_12_cropland = c(0, 0.333333333333333, 
0.333333333333333, 1, 1, 0.7, 1, 1, 0, 0, 1, 0.3, 0.9, 0.9, 0.7
), pland_13_urban = c(0.875, 0, 0, 0, 0, 0, 0, 0, 0.888888888888889, 
0.75, 0, 0, 0, 0, 0), pland_14_mosiac = c(0, 0.666666666666667, 
0.666666666666667, 0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0, 0, 0.3), elevation_median = c(31.9876055132011, 
60.187326902531, 60.187326902531, 63.4307961294557, 63.4307961294557, 
55.2689008500971, 53.0951598592064, 53.0951598592064, 22.1066004189197, 
24.352084994479, 51.3272817386215, 62.2362454373989, 4.56623070514396, 
4.56623070514396, 32.65840408727), elevation_sd = c(2.05966198806878, 
4.73491604153919, 4.73491604153919, 3.81928752524468, 3.81928752524468, 
10.3834519297609, 4.28755305909316, 4.28755305909316, 5.21662903838999, 
8.104243228417, 1.76646574114901, 3.11995484557375, 0.12543304763668, 
0.12543304763668, 3.89986664218466), Blackbird = structure(c(2L, 
2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", 
"1"), class = "factor"), `Blue Tit` = structure(c(2L, 2L, 2L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", 
"1"), class = "factor"), Bullfinch = structure(c(1L, 1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0", 
"1"), class = "factor"), `Carrion Crow` = structure(c(2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("0", 
"1"), class = "factor"), Chaffinch = structure(c(2L, 2L, 2L, 
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L), .Label = c("0", 
"1"), class = "factor"), `Coal Tit` = structure(c(2L, 2L, 2L, 
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L), .Label = c("0", 
"1"), class = "factor"), `Collared Dove` = structure(c(2L, 2L, 
2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L), .Label = c("0", 
"1"), class = "factor")), row.names = c(NA, -15L), class = c("tbl_df", 
"tbl", "data.frame"))
desertnaut
  • 57,590
  • 26
  • 140
  • 166
Lime
  • 738
  • 5
  • 17

1 Answers1

0

What does 1L mean and 2L?

What is the target feature?

 year = [2014, 2014, 2015, 2014, 2015, 2014, 2014, 2015, 2014, 2014, 2015, 2014, 2014, 2015, 2014] 
 pland_00_water = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 
 pland_01_evergreen_needleleaf = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 
 pland_04_deciduous_broadleaf = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 
 pland_05_mixed_forest = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 
 pland_09_savanna = [0.125, 0, 0, 0, 0, 0.2, 0, 0, 0.111111111111111, 0.25, 0, 0, 0.1, 0.1, 0] 
 pland_10_grassland = [0, 0, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0.6, 0, 0, 0] 
 pland_11_wetland = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 
 pland_12_cropland = [0, 0.333333333333333, 0.333333333333333, 1, 1, 0.7, 1, 1, 0, 0, 1, 0.3, 0.9, 0.9, 0.7] 
 pland_13_urban = [0.875, 0, 0, 0, 0, 0, 0, 0, 0.888888888888889, 0.75, 0, 0, 0, 0, 0] 
 pland_14_mosiac = [0, 0.666666666666667, 0.666666666666667, 0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0, 0, 0.3] 
 elevation_median = [31.9876055132011, 60.187326902531, 60.187326902531, 63.4307961294557, 63.4307961294557, 55.2689008500971, 53.0951598592064, 53.0951598592064, 22.1066004189197, 24.352084994479, 51.3272817386215, 62.2362454373989, 4.56623070514396, 4.56623070514396, 32.65840408727] 
 elevation_sd = [2.05966198806878, 4.73491604153919, 4.73491604153919, 3.81928752524468, 3.81928752524468, 10.3834519297609, 4.28755305909316, 4.28755305909316, 5.21662903838999, 8.104243228417, 1.76646574114901, 3.11995484557375, 0.12543304763668, 0.12543304763668, 3.89986664218466] 
 Blackbird = ['2L', '2L', '2L', '1L', '1L', '2L', '2L', '2L', '1L', '2L', '2L', '2L', '2L', '2L', '2L'] 
 BlueTit = ['2L', '2L', '2L', '1L', '1L', '2L', '2L', '2L', '2L', '2L', '2L', '2L', '2L', '2L', '2L'] 
 Bullfinch = ['1L', '1L', '1L', '1L', '2L', '1L', '1L', '1L', '1L', '1L', '1L', '1L', '1L', '1L', '1L']
 CarrionCrow = ['2L', '1L', '1L', '1L', '1L', '1L', '1L', '1L', '1L', '1L', '1L', '2L', '1L', '1L', '1L'] 
 Chaffinch = ['2L', '2L', '2L', '1L', '1L', '1L', '2L', '2L', '1L', '1L', '1L', '2L', '2L', '2L', '1L']
 CoalTit = ['2L', '2L', '2L', '1L', '1L', '1L', '2L', '2L', '1L', '1L', '2L', '2L', '2L', '2L', '1L']
 CollaredDove = ['2L', '2L', '2L', '1L', '2L', '1L', '2L', '2L', '1L', '2L', '1L', '2L', '2L', '2L','1L']
Golden Lion
  • 3,840
  • 2
  • 26
  • 35
  • https://github.com/topics/bird-species-classification here are some classifiers approaches. https://github.com/zahan97/Bird-Species-Classification/tree/master/data (features -> label is the target species) using a mlpclassifier or a neural network the designers predicted bird species – Golden Lion Mar 17 '21 at 09:17
  • (https://github.com/zahan97/Bird-Species-Classification/blob/master/nn.py) neural networks. I would use pytorch. – Golden Lion Mar 17 '21 at 09:24