Just checking for duplicates will let you prove obvious cases of non legal fills.
I'm afraid you have to try and solve the Sudoku board to prove a partial fill to be legal, that is to have solutions. Finding a single solution is enough, but there might be other solutions. Finding all solutions is not much harder.
Proving that there are no solutions is easy for the case you provide, a single step leads to failure (all possible solutions for cell A7 produce a duplicate) but may be more complex in the general case, as it requires mutiple steps.
A brute force approach, such as trying every possibility for every empty square and recursing, has a horrible complexity. You can find shortcut approaches on the Internet and investigate them with you own code.
Edit: When in doubt, try brute force! Sudoku may be NP-Complete in the general case, but in practice it does not seem to diverge significantly. I reflected on my initial analysis above and decided to give it a try with brute force. I wrote a small program to solve Sudokus given on the command line. Even on my old MacBook Air, it solves everything I tried in less than a few milliseconds.
Edit again: I updated the code with a generic version that uses a better algorithm and is close to 80 times faster.
Here is the code:
/* Generic Sudoku solver by Charles Gordon */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef GEN
#define GEN 3
#endif
#define SIZE (GEN*GEN)
typedef unsigned char u8_t;
typedef unsigned long long llu_t;
static int print_count = 0, print_solutions = 1;
/* utility arrays to dispatch cell number to signature arrays */
static int rowsig[SIZE * SIZE], colsig[SIZE * SIZE], regsig[SIZE * SIZE];
static void sigcell_init() {
for (int i = 0, row = 0; row < SIZE; row++) {
for (int col = 0; col < SIZE; col++, i++) {
rowsig[i] = row;
colsig[i] = SIZE + col;
regsig[i] = SIZE + SIZE + (row / GEN) * GEN + (col / GEN);
}
}
}
static void print_board(const int *board, const char *header) {
printf("%s:\n", header);
for (int i = 0, row = 0; row < SIZE; row++) {
for (int col = 0; col < SIZE; col++, i++)
printf("%*.0d", 2 + (SIZE > 9) + (SIZE > 99), board[i]);
putchar('\n');
}
}
static int validate_board(const int *board, u8_t sigs[3 * SIZE][SIZE]) {
memset(sigs, 0, 3 * SIZE * SIZE * sizeof(sigs[0][0]));
for (int i = 0; i < SIZE * SIZE; i++) {
int val = board[i];
if (val == 0)
continue;
if (val < 0 || val > SIZE)
return 2; /* found invalid value */
val -= 1;
if (sigs[rowsig[i]][val] | sigs[colsig[i]][val] | sigs[regsig[i]][val])
return 1; /* found duplicate */
sigs[rowsig[i]][val] = sigs[colsig[i]][val] = sigs[regsig[i]][val] = 1;
}
return 0; /* board is valid */
}
static llu_t try_board(int *board, u8_t sigs[3 * SIZE][SIZE], int *empty, int emp) {
llu_t count, total = 0;
int n, cell;
u8_t *rowsigp, *colsigp, *regsigp;
if (emp == 0) {
if (print_solutions)
print_board(board, "found a board solution");
return 1;
}
cell = *empty; /* next cell to try and populate */
rowsigp = sigs[rowsig[cell]];
colsigp = sigs[colsig[cell]];
regsigp = sigs[regsig[cell]];
for (n = 0; n < SIZE; n++) {
/* check if value is possible */
if ((rowsigp[n] | colsigp[n] | regsigp[n]) == 0) {
rowsigp[n] = colsigp[n] = regsigp[n] = 1;
board[cell] = n + 1;
if ((count = try_board(board, sigs, empty + 1, emp - 1)) > 0) {
total += count;
if (!print_count)
break;
}
rowsigp[n] = colsigp[n] = regsigp[n] = 0;
board[cell] = 0;
}
}
return total;
}
int main(int argc, char *argv[]) {
int board[SIZE * SIZE]; /* board values: empty=0 */
u8_t sigs[3 * SIZE][SIZE]; /* signatures for row, col and regions */
int empty[SIZE * SIZE]; /* list of empty cells */
int i, n;
llu_t count = 0;
sigcell_init();
/* initialize board */
for (i = 1, n = 0; i < argc; i++) {
if (!strcmp(argv[i], "-a")) {
print_count = 1;
print_solutions = 0;
continue;
}
if (n < SIZE * SIZE)
board[n++] = atoi(argv[i]);
}
while (n < SIZE * SIZE)
board[n++] = 0;
print_board(board, "initial board");
if (validate_board(board, sigs)) {
printf("board is invalid\n");
return 1;
}
/* compute list of empty cells */
for (i = n = 0; i < SIZE * SIZE; i++) {
if (board[i] == 0)
empty[n++] = i;
}
if ((count = try_board(board, sigs, empty, n)) == 0) {
printf("board does not have solutions\n");
return 1;
}
if (print_count) {
printf("total board solutions: %llu\n", count);
}
return 0;
}
Passing a command line option of -a
makes the program look of all solutions and print the total number found. As expected, the timings grow exponentially with the number of empty cells. But they only become significant when the board is less than half full and has many solutions.