2

I rewrited code that solves GenomicRangeQuery task from Java to Swift. The code in Jave gets 100/100 score but the code in Swift fails all performance tests. I'm trying to understand why because logic in code is the same. I'w wondering why Swift code is executing so long. Do I using some very slow parts in my swift code that I'm not aware of. Please take a look at this Java code copied from here.

class Solution {
  public int[] solveGenomicRange(String S, int[] P, int[] Q) {
    //used jagged array to hold the prefix sums of each A, C and G genoms
    //we don't need to get prefix sums of T, you will see why.
    int[][] genoms = new int[3][S.length()+1];
    //if the char is found in the index i, then we set it to be 1 else they are 0
    // 3 short values are needed for this reason
    short a, c, g;
    for (int i=0; i<S.length(); i++) {
      a = 0; c = 0; g = 0;
      if ('A' == (S.charAt(i))) {
        a=1;
      }
      if ('C' == (S.charAt(i))) {
        c=1;
      }
      if ('G' == (S.charAt(i))) {
        g=1;
      }
      //here we calculate prefix sums. To learn what's prefix sums look at here https://codility.com/media/train/3-PrefixSums.pdf
      genoms[0][i+1] = genoms[0][i] + a;
      genoms[1][i+1] = genoms[1][i] + c;
      genoms[2][i+1] = genoms[2][i] + g;
    }

    int[] result = new int[P.length];
    //here we go through the provided P[] and Q[] arrays as intervals
    for (int i=0; i<P.length; i++) {
      int fromIndex = P[i];
      //we need to add 1 to Q[i],
      //because our genoms[0][0], genoms[1][0] and genoms[2][0]
      //have 0 values by default, look above genoms[0][i+1] = genoms[0][i] + a;
      int toIndex = Q[i]+1;
      if (genoms[0][toIndex] - genoms[0][fromIndex] > 0) {
        result[i] = 1;
      } else if (genoms[1][toIndex] - genoms[1][fromIndex] > 0) {
        result[i] = 2;
      } else if (genoms[2][toIndex] - genoms[2][fromIndex] > 0) {
        result[i] = 3;
      } else {
        result[i] = 4;
      }
    }
    return result;
  }
}

And here the same code rewritten to Swift 2.1

public func solution(inout S:String, inout _ P:[Int], inout _ Q:[Int]) -> [Int] {
  let len = S.characters.count

  //used jagged array to hold the prefix sums of each A, C and G genoms
  //we don't need to get prefix sums of T, you will see why.
  var genoms = [[Int]](count: 3, repeatedValue: [Int](count: len+1, repeatedValue: 0))

  //if the char is found in the index i, then we set it to be 1 else they are 0
  // 3 short values are needed for this reason
  var a,c,g:Int
  for i in 0..<len {
    a=0; c=0; g=0
    let char = S[S.startIndex.advancedBy(i)]
    switch char {
    case "A": a=1;
    case "C": c=1;
    case "G": g=1;
    default: ()
    }

    //here we calculate prefix sums. To learn what's prefix sums look at here https://codility.com/media/train/3-PrefixSums.pdf
    genoms[0][i+1] = genoms[0][i] + a
    genoms[1][i+1] = genoms[1][i] + c
    genoms[2][i+1] = genoms[2][i] + g

  }

  var result: [Int] = [Int](count: P.count, repeatedValue: 0)
  //here we go through the provided P[] and Q[] arrays as intervals
  for i in 0..<P.count {
    let fromIndex = P[i]
    //we need to add 1 to Q[i],
    //because our genoms[0][0], genoms[1][0] and genoms[2][0]
    //have 0 values by default, look above genoms[0][i+1] = genoms[0][i] + a;
    let toIndex = Q[i] + 1

    if (genoms[0][toIndex] - genoms[0][fromIndex] > 0) {
      result[i] = 1;
    } else if (genoms[1][toIndex] - genoms[1][fromIndex] > 0) {
      result[i] = 2;
    } else if (genoms[2][toIndex] - genoms[2][fromIndex] > 0) {
      result[i] = 3;
    } else {
      result[i] = 4;
    }
  }
  return result
}

Does anybody know why this Swift code fails all performance tests when Java code passes all tests? I suppose I'm touching some sensitive bottleneck in Swift but I'm not aware where.

If someone is not aware of codility this is the link to the task.

Community
  • 1
  • 1
Marcin Kapusta
  • 5,076
  • 3
  • 38
  • 55
  • 1
    `S.startIndex.advancedBy(i)` could be potentially slow. You might try a byte array instead of a string. Better though, use a Profiler to figure out where the cycles will be spend - Instruments is especially good at this. – CouchDeveloper Mar 10 '16 at 09:33
  • You were right. Thank You very much! It's a shame that accessing one character by index is so slow using normal String api. I created such byte array let seq = Array(S.utf8) and operate on it. – Marcin Kapusta Mar 10 '16 at 09:59
  • Also, consider this: For Swift: "Detected time complexity: O(N * M)", and for for Java: "Detected time complexity:O(N + M)", where `N` is the number of characters of the DNA sequence, and `M` is the number of queries. It is clear, that for Swift the algorithm is _inferior_ compared to the one for Java. Your task is now to find out _why_ ;) – CouchDeveloper Mar 10 '16 at 10:00
  • Here is the solution in Swift that gets 100% https://stackoverflow.com/questions/55160288/explanation-of-this-prefix-sum-coding-challenge-from-codility-genomicrangequery – AD Progress Mar 14 '19 at 14:36

3 Answers3

1

This Java code for the GenomicRangeQuery problem scored 100% at codility. It uses 4 simple Arrays to do the prefix sums. I post it here as an alternative approach. Time Complexity is O(n+m)

public int[] solution4(String S, int[] P, int[] Q){

    char[]chars=S.toCharArray();
    int n=chars.length;

    int[]contaA=new int[n+1];
    int[]contaC=new int[n+1];
    int[]contaG=new int[n+1];
    int[]contaT=new int[n+1];

    for (int i=1;i<n+1;i++){
        contaA[i]=contaA[i-1];
        contaC[i]=contaC[i-1];
        contaG[i]=contaG[i-1];
        contaT[i]=contaT[i-1];
        if (chars[i-1]=='A')contaA[i]+=1;
        if (chars[i-1]=='C')contaC[i]+=1;
        if (chars[i-1]=='G')contaG[i]+=1;
        if (chars[i-1]=='T')contaT[i]+=1;
    }

    int[] arrayContadores=new int[P.length];

    for (int i=0;i<P.length;i++){
        int primeiro=P[i];
        int ultimo=Q[i];

        int A=contaFatia(contaA,primeiro,ultimo);
        int C=contaFatia(contaC,primeiro,ultimo);
        int G=contaFatia(contaG,primeiro,ultimo);
        int T=contaFatia(contaT,primeiro,ultimo);

        if (A>0){arrayContadores[i]=1;
        }else if (C>0) {
            arrayContadores[i] = 2;
        }else if(G>0){
            arrayContadores[i]=3;
        }else if (T>0){
            arrayContadores[i]=4;
        }

    }
    return arrayContadores;
}


public int contaFatia(int[]P,int x,int y){
    return P[y+1]-P[x];
}
Jose Pinto
  • 11
  • 3
1
public func solution(_ S : inout String, _ P : inout [Int], _ Q : inout [Int]) -> [Int] {

   var retArr = [Int]()
   var chrArr = [Character]()


   for chr in S {
       chrArr.append(chr)
   }


   for i in 0..<P.count {

       var minFactor = 4

       if P[i] - Q[i] == 0 {
           if chrArr[P[i]] == "A"{
               minFactor = 1
           }else if chrArr[P[i]] == "C"{
               minFactor = 2
           }else if chrArr[P[i]] == "G"{
               minFactor = 3
           }
       }else {
           for j in P[i]...Q[i] {

               if chrArr[j] == "A"{
                   minFactor = 1
                       break
               }else if chrArr[j] == "C"{
                       minFactor = 2
               }else if chrArr[j] == "G"{
                       if minFactor > 2 {
                           minFactor = 3
                           }
                   }
               }
       }

       retArr.append(minFactor)
   }

   return retArr
}
stealthyninja
  • 10,343
  • 11
  • 51
  • 59
0

I have been playing with things in Swift for a while trying to come up with the right solution. This is the closest I have come.

public func solution(_ S : inout String, _ P : inout [Int], _ Q : inout [Int]) -> [Int] {
    let N = S.count + 1
    var outerImpacts: ContiguousArray<ContiguousArray<Int>> = []
    outerImpacts.reserveCapacity(N)
    for i in 0..<N {
        if i > 0 {
            var innerImpacts = outerImpacts[i - 1]
            switch S[S.index(S.startIndex, offsetBy: i - 1)] {
            case "A":
                innerImpacts[0] += 1
            case "C":
                innerImpacts[1] += 1
            case "G":
                innerImpacts[2] += 1
            case "T":
                innerImpacts[3] += 1
            default:
                break
            }
            outerImpacts.append(innerImpacts)
        } else {
            outerImpacts.append(ContiguousArray<Int>(repeating: 0, count: 4))
        }
    }

    let M: Int = P.count
    var minimalImpacts: [Int] = []
    minimalImpacts.reserveCapacity(M)
    for i in 0..<M {
        for j in 0..<4 where (outerImpacts[Q[i] + 1][j] - outerImpacts[P[i]][j]) > 0 {
            minimalImpacts.append(j + 1)
            break
        }
    }

    return minimalImpacts
}
Sethmr
  • 3,046
  • 1
  • 24
  • 42