1

I use the following R code to optimize Soccer lineups for my fantasy sports league. It has been working great up until now, but a new wrinkle has been added into the list of constraints that I would like to resolve.

A lineup consists of 8 players. 1GK, 2D, 2M, 2F, & 1 Util.

When creating the model Matrix, I now have to account for hybrid player positions such as M/F or D/M

In R what is the correct way to add a 1 in the column for M and a 1 in the column for F if a players position is M/F? Is this the correct approach to resolve this or should I be looking at other ideas.

Working Solver code with GK D M F positions accounted for but not D/M or M/F

df <- read.csv("players.csv",encoding = "UTF-8")
mm <- cbind(model.matrix(as.formula("FP~Pos+0"), df))
mm <- cbind(mm, mm, 1, df$Salary, df$Salary, df$FP)
colnames(mm) <- c("D", "F", "GK", "M", "D", "F", "GK", "M", "tot", "salary", "minSal", "FP")

mm <- t(mm)
obj <- df$FP
dir <- c("<=", "<=", "<=", "<=", ">=", ">=", ">=", ">=", "==", "<=", ">=", "<=")

x <- 20000
vals <- c()
ptm <- proc.time()
for(i in 1:5){
  rhs <- c(3, 3, 1, 3, 2, 2, 1, 2, 8, 50000, 49500, x)
  lp <- lp(direction = 'max',
           objective.in = obj,
           all.bin = T,
           const.rhs = rhs,
           const.dir = dir,
           const.mat = mm)
  vals <- c(vals, lp$objval)
  x <- lp$objval - 0.00001
  df$selected <- lp$solution
  lineup <- df[df$selected == 1, ]
  lineup = subset(lineup, select = -c(selected))
  lineup <- lineup %>%
    arrange(Pos)
  print("---- Start ----")
  print(i)
  print(lineup)
  print(sum(lineup$FP))
  print(mean(lineup$own, na.rm = TRUE))
  print(sum(lineup$Salary))
  print(sum(lineup$S))
  print("---- END ----")
}
proc.time() - ptm

Here is a sample pool of approx 100 players with a few hybrid players included.

structure(list(Name = structure(c(104L, 105L, 92L, 16L, 84L, 
53L, 85L, 37L, 21L, 34L, 100L, 101L, 83L, 31L, 14L, 35L, 98L, 
59L, 60L, 5L, 6L, 78L, 57L, 89L, 26L, 17L, 74L, 63L, 33L, 71L, 
75L, 41L, 9L, 39L, 12L, 1L, 29L, 7L, 2L, 68L, 73L, 90L, 46L, 
72L, 79L, 50L, 88L, 20L, 97L, 64L, 67L, 3L, 94L, 4L, 22L, 103L, 
52L, 47L, 30L, 58L, 10L, 44L, 28L, 38L, 23L, 15L, 49L, 69L, 81L, 
43L, 99L, 93L, 32L, 56L, 82L, 91L, 62L, 36L, 70L, 48L, 11L, 77L, 
27L, 51L, 25L, 24L, 65L, 96L, 42L, 18L, 102L, 86L, 76L, 87L, 
45L, 61L, 40L, 95L, 8L, 55L, 13L, 66L, 80L, 19L, 54L), .Label = c(" Bojan", 
" Oscar", " Willian", "Aaron Ramsey", "Abel Hernandez", "Adam Smith", 
"Adama Diomande", "Adlene Guedioura", "Adnan Januzaj", "Ahmed Elmohamady", 
"Alex Iwobi", "Alex Oxlade-Chamberlain", "Alexis Sanchez", "Andre Gray", 
"Andrew Robertson", "Andros Townsend", "Anthony Martial", "Antonio Valencia", 
"Ben Mee", "Branislav Ivanovic", "Calum Chambers", "Cedric Soares", 
"Cesc Fabregas", "Charlie Daniels", "Christian Fuchs", "Curtis Davies", 
"Daley Blind", "Daniel Drinkwater", "David de Gea", "Demarai Gray", 
"Diego Costa", "Donald Love", "Dusan Tadic", "Eden Hazard", "Eldin Jakupovic", 
"Erik Pieters", "Etienne Capoue", "Fernando Llorente", "Gareth Barry", 
"Glenn Whelan", "Gylfi Sigurdsson", "Hector Bellerin", "Idrissa Gueye", 
"Jack Cork", "Jack Rodwell", "Jason Puncheon", "Jefferson Montero", 
"Jeremain Lens", "Jeremy Pied", "Jermain Defoe", "Joe Allen", 
"Joel Ward", "John Obi Mikel", "Jordi Amat", "Jordon Ibe", "Joshua King", 
"Juan Mata", "Kasper Schmeichel", "Kevin Mirallas", "Kyle Naughton", 
"Laurent Koscielny", "Leighton Baines", "Leroy Fer", "Lukasz Fabianski", 
"Maarten Stekelenburg", "Marc Albrighton", "Mason Holgate", "Matt Targett", 
"Matthew Lowton", "Max Gradel", "Michy Batshuayi", "Modou Barrow", 
"Nacho Monreal", "Nathan Redmond", "Nordin Amrabat", "Pape Souare", 
"Papy Djilobodji", "Patrick van Aanholt", "Paul Pogba", "Phil Bardsley", 
"Pierre-Emile Højbjerg", "Ramiro Funes Mori", "Riyad Mahrez", 
"Robert Snodgrass", "Ross Barkley", "Ryan Fraser", "Sam Clucas", 
"Sam Vokes", "Santiago Cazorla", "Serge Gnabry", "Shane Long", 
"Shaun Maloney", "Simon Francis", "Stephen Kingsley", "Stephen Ward", 
"Steven Davis", "Steven Defour", "Theo Walcott", "Thibaut Courtois", 
"Tom Heaton", "Wayne Rooney", "Wayne Routledge", "Wilfried Zaha", 
"Xherdan Shaqiri", "Zlatan Ibrahimovic"), class = "factor"), 
    Salary = c(7000L, 9600L, 5700L, 7100L, 6500L, 3200L, 7800L, 
    4200L, 3300L, 8600L, 4200L, 7900L, 9900L, 8700L, 7700L, 4300L, 
    6700L, 5600L, 3700L, 6600L, 4700L, 5700L, 6600L, 7200L, 3500L, 
    7300L, 5900L, 4300L, 7700L, 7100L, 4000L, 9100L, 7400L, 4000L, 
    5800L, 5700L, 5600L, 6300L, 6800L, 4500L, 5100L, 3400L, 5700L, 
    5100L, 8000L, 7800L, 7000L, 5100L, 4900L, 4500L, 3300L, 8300L, 
    3200L, 6600L, 4900L, 6300L, 4400L, 4200L, 4800L, 5200L, 5200L, 
    4500L, 4300L, 7100L, 6500L, 4100L, 3000L, 3800L, 4700L, 4600L, 
    5800L, 4600L, 4200L, 6100L, 3500L, 6800L, 5800L, 4800L, 7300L, 
    5000L, 5000L, 3300L, 4200L, 3900L, 6100L, 5500L, 5400L, 4700L, 
    4700L, 4600L, 4400L, 3400L, 4300L, 4900L, 4600L, 4000L, 3500L, 
    3600L, 3300L, 4800L, 9300L, 7900L, 3700L, 3400L, 2800L), 
    Position = structure(c(5L, 3L, 2L, 5L, 5L, 5L, 5L, 5L, 1L, 
    6L, 4L, 3L, 6L, 3L, 3L, 4L, 6L, 6L, 1L, 3L, 1L, 1L, 5L, 5L, 
    1L, 6L, 6L, 5L, 5L, 3L, 6L, 5L, 5L, 5L, 6L, 6L, 4L, 3L, 5L, 
    1L, 2L, 5L, 5L, 6L, 5L, 3L, 3L, 2L, 5L, 4L, 1L, 5L, 1L, 5L, 
    1L, 6L, 1L, 6L, 6L, 4L, 1L, 5L, 5L, 3L, 5L, 1L, 1L, 1L, 5L, 
    5L, 4L, 1L, 1L, 3L, 1L, 3L, 2L, 1L, 6L, 3L, 6L, 1L, 1L, 5L, 
    1L, 2L, 4L, 5L, 1L, 1L, 5L, 5L, 1L, 5L, 5L, 1L, 5L, 1L, 5L, 
    6L, 6L, 5L, 1L, 1L, 1L), .Label = c("D", "D/M", "F", "GK", 
    "M", "M/F"), class = "factor"), FP = c(23.5, 21.75, 21, 19.75, 
    17.5, 17.333, 16.625, 16.5, 16.5, 16.25, 16, 15.25, 14.875, 
    14.25, 13.75, 13.5, 13.375, 13.25, 12.875, 12.75, 12.75, 
    12.5, 12.375, 12, 11.75, 11.625, 11.375, 11, 10.875, 10.625, 
    10.5, 10.375, 10.125, 10, 9.625, 9.625, 9.5, 9.25, 9.125, 
    9.125, 9, 9, 8.875, 8.875, 8.75, 8.75, 8.5, 8.5, 8.5, 8.5, 
    8.5, 8.25, 8.25, 8, 8, 7.875, 7.875, 7.875, 7.75, 7.5, 7.5, 
    7.5, 7.5, 7.25, 7.25, 7.125, 7, 6.875, 6.625, 6.625, 6.5, 
    6.5, 6.5, 6.25, 6.25, 6.125, 6.125, 6.125, 6, 6, 6, 6, 5.875, 
    5.875, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.625, 
    5.5, 5.5, 5.5, 5.5, 5.375, 5.375, 5.25, 5.125, 5, 5, 5, 5
    ), teamAbbrev = structure(c(11L, 9L, 7L, 5L, 7L, 4L, 6L, 
    14L, 1L, 4L, 3L, 9L, 8L, 4L, 3L, 7L, 1L, 6L, 13L, 7L, 2L, 
    12L, 9L, 1L, 7L, 9L, 10L, 13L, 10L, 4L, 14L, 13L, 12L, 6L, 
    1L, 11L, 9L, 7L, 4L, 10L, 1L, 1L, 5L, 13L, 9L, 12L, 3L, 4L, 
    3L, 13L, 6L, 4L, 13L, 1L, 10L, 5L, 5L, 13L, 8L, 8L, 7L, 13L, 
    8L, 13L, 4L, 7L, 10L, 3L, 10L, 6L, 4L, 2L, 12L, 2L, 6L, 10L, 
    6L, 11L, 2L, 12L, 1L, 12L, 9L, 11L, 8L, 2L, 6L, 10L, 1L, 
    9L, 13L, 2L, 5L, 7L, 12L, 1L, 11L, 3L, 14L, 2L, 1L, 8L, 11L, 
    3L, 13L), .Label = c("ARS", "BOU", "BUR", "CHE", "CRY", "EVE", 
    "HUL", "LEI", "MU", "SOU", "STK", "SUN", "SWA", "WAT"), class = "factor")), .Names = c("Name", 
"Salary", "Position", "FP", "teamAbbrev"), class = "data.frame", row.names = c(NA, 
-105L))
  • Are you getting some error? Or is this a question about modeling? I don't see a specific programming question here. If you have a question about how to model your data to account for Hybrid players, that sounds like a statistical question that should be asked at [stats.se], not Stack Overflow. – MrFlick Sep 14 '16 at 15:33
  • I am not getting an error. This is not a modelling question. The issue is when the program runs and Hybrid players are in the data. They are not included in the resulting lineups even though they would be the most optimal choice. I would like to modify the code to account for hybrid players. I tried adding players twice one for M and on for F as an example but i got results with that player included twice in some instances – sanga collins Sep 14 '16 at 15:37
  • What you are describing is a modeling problem. Your data doesn't fit the assumptions of a simple `lp` model. You have a more complicated scenario. You either need find a way to model your additional constraint in `lp` or transform your data to a form compatible with an `lp` model. Or possibly find a function other than `lp` that accommodate such an optimization model. But as i've already mentioned, I think there are better places to get such help. – MrFlick Sep 14 '16 at 16:02
  • Ahhhh I think I see what you mean now. When you say modelling I am thinking of regression model or linear model since you mentioned statistical questions on cross validated. (I am new to stats as you may have noticed). I am going to re word the question so it more accurately reflects what I am trying to accomplish – sanga collins Sep 14 '16 at 16:18

1 Answers1

1

By using an empty matrix and filling the rows with the correct values for each position I was able to get this to work.

#### SOLVER ##### ----
mm <- matrix(0, nrow = 8, ncol = nrow(df))
# Goal Keeper
j<-1
i<-1
for (i in 1:nrow(df)){
  if (df$Pos[i]=="GK")
    mm[j,i]<-1
}
# Defender
j<-2
i<-1
for (i in 1:nrow(df)){
  if (df$Pos[i]=="D")
    mm[j,i]<-1
}
# Midfielder
j<-3
i<-1
for (i in 1:nrow(df)){
  if (df$Pos[i]=="M"   ||
      df$Pos[i]=="M/F")
    mm[j,i]<-1
}
# Forward
j<-4
i<-1
for (i in 1:nrow(df)){
  if (df$Pos[i]=="F"   ||
      df$Pos[i]=="M/F")
    mm[j,i]<-1
}
# Utility
j<-5
i<-1
for (i in 1:nrow(df)){
  if (!df$Pos[i]=="GK")
    mm[j,i]<-1
}
# Salary
mm[6, ] <- df$Salary
mm[7, ] <- df$FP
mm[8, ] <- 1
# rbind existing matrix to itself to set minimum constraints
mm <- rbind(mm, mm[1:5,])
i<-1

objective.in <- df$FP
const.mat <- mm
const.dir <- c("<=", "<=", "<=", "<=", "<=", "<=", "<=", "==",
               ">=", ">=", ">=", ">=", ">=")

x <- 20000
vals <- c()

for(i in 1:5){
  const.rhs <- c(1, 4, 4, 4, 7, 50000, x, 8, # max for each contraint
                 1, 2, 2, 2, 7)              # min for each constraint
  sol <- lp(direction = "max", objective.in, # maximize objective function
            const.mat, const.dir, const.rhs, # constraints
            all.bin = TRUE)
  vals <- c(vals, sol$objval)
  x <- sol$objval - 0.00001
  inds <- which(sol$solution == 1)
  sum(df$salary[inds])
  solution<-df[inds, ]
  solution <- solution[,-c(8)]
  solution <- solution %>%
    arrange(Pos)
  print("---- Start ----")
  print(i)
  print(solution)
  print(sum(solution$FP))
  print(sum(solution$Salary))
  print(sum(solution$S))
  print("---- END ----")
}