This code:
{-# LANGUAGE BangPatterns #-}
module Main where
import Data.Bits
import Data.Word
import Control.Monad
import System.CPUTime
import Data.List
-- The Damenproblem.
-- Wiki: https://de.wikipedia.org/wiki/Damenproblem
main :: IO ()
main = do
start <- getCPUTime
print $ dame 14
end <- getCPUTime
print $ "Needed " ++ (show ((fromIntegral (end - start)) / (10^12))) ++ " Seconds"
type BitState = (Word64, Word64, Word64)
dame :: Int -> Int
dame max = foldl' (+) 0 $ map fn row
where fn x = recur (max - 2) $ nextState (x, x, x)
recur !depth !state = foldl' (+) 0 $ flip map row $ getPossible depth (getStateVal state) state
getPossible depth !stateVal state bit
| (bit .&. stateVal) > 0 = 0
| depth == 0 = 1
| otherwise = recur (depth - 1) (nextState (addBitToState bit state))
row = take max $ iterate moveLeft 1
getStateVal :: BitState -> Word64
getStateVal (l, r, c) = l .|. r .|. c
addBitToState :: Word64 -> BitState -> BitState
addBitToState l (ol, or, oc) = (ol .|. l, or .|. l, oc .|. l)
nextState :: BitState -> BitState
nextState (l, r, c) = (moveLeft l, moveRight r, c)
moveRight :: Word64 -> Word64
moveRight x = shiftR x 1
moveLeft :: Word64 -> Word64
moveLeft x = shift x 1
needs about 60 seconds to execute. If i enable compiler optimisation with -O2
, it takes about 7 seconds. -O1
is faster and takes about 5 seconds.
Tested a java version of this code, with for-loops in place of mapped lists, it takes about 1s (!). Been trying my hardest to optimize yet none of the tips i found online helped more than half a second. Please help
Edit: Java version:
public class Queens{
static int getQueens(){
int res = 0;
for (int i = 0; i < N; i++) {
int pos = 1 << i;
res += run(pos << 1, pos >> 1, pos, N - 2);
}
return res;
}
static int run(long diagR, long diagL, long mid, int depth) {
long valid = mid | diagL | diagR;
int resBuffer = 0;
for (int i = 0; i < N; i++) {
int pos = 1 << i;
if ((valid & pos) > 0) {
continue;
}
if (depth == 0) {
resBuffer++;
continue;
}
long n_mid = mid | pos;
long n_diagL = (diagL >> 1) | (pos >> 1);
long n_diagR = (diagR << 1) | (pos << 1);
resBuffer += run(n_diagR, n_diagL, n_mid, depth - 1);
}
return resBuffer;
}
}
Edit: Running on windows with ghc 8.4.1 on an i5 650 with 3.2GHz.