3

In Swift 3.0 (NS)Scanner, string property returns the string being parsed and scanLocation returns the current scan location. I'm trying to extract the parsed text:

var parsedText: String {
    return string.substring(to: string.index(string.startIndex, offsetBy: scanLocation))
}

This code crashes when string contains multibyte characters. It turned out that scanLocation returns number of utf16 units, not number of characters parsed.

How to convert scanLocation (code units) into character index?

Playground for experimenting:

let scanner = Scanner(string: "Hello")
scanner.scanString("Hello", into: nil)
print(scanner.scanLocation) // Returns 7 instead of 6
Zmey
  • 2,304
  • 1
  • 24
  • 40

1 Answers1

2

To obtain character index:

import Foundation

extension Scanner {
    var scanLocationInCharacters: Int {
        let utf16 = string.utf16
        guard let to16 = utf16.index(utf16.startIndex, offsetBy: scanLocation, limitedBy: utf16.endIndex),
            let to = String.Index(to16, within: string) else {
                return 0
        }
        return string.distance(from: string.startIndex, to: to)
    }
}

let scanner = Scanner(string: "Hello")
scanner.scanString("Hello", into: nil)

print(scanner.scanLocation) // 7
print(scanner.scanLocationInCharacters) // 6

To retrieve parsed text:

var parsedText: String {
    let utf16 = string.utf16
    guard let to16 = utf16.index(utf16.startIndex, offsetBy: scanLocation, limitedBy: utf16.endIndex),
        let to = String.Index(to16, within: string) else {
            return ""
    }
    return string.substring(to: to)
}

Bonus: when reporting errors, you'll probably want to print current line and column as well:

var currentLine: Int {
    var lineCount = 1
    for character in parsedText.characters {
        if character == "\n" { lineCount += 1 }
    }
    return lineCount
}

var currentColumn: Int {
    let text = parsedText
    if let range = text.range(of: "\n", options: .backwards) {
        return text.distance(from: range.upperBound, to: text.endIndex) + 1
    }
    return parsedText.characters.count + 1
}
Zmey
  • 2,304
  • 1
  • 24
  • 40