I'am just trying to use a retrained inception model in Tensorflow sharp in Unity. The retrained model was prepared with optimize_for_inference and is working like a charm in python. But it is pretty inaccurate in c#.
the code works like this: First i get the Picture
//webcamtexture transformed to picture in jpg
var pic = _texture.EncodeToJpg();
//added Picture to queue for the object detection thread
_detectedObjects.addTens(pic);
After that a thread will handle each collected picture
public void HandlePicture(byte[] picture)
{
var tensor = ImageUtil.CreateTensorFromImageFile(picture);
var runner = session.GetRunner();
runner.AddInput(g_input, tensor).Fetch(g_output);
var output = runner.Run();
var bestIdx = 0;
float best = 0;
var result = output[0];
var rshape = result.Shape;
var probabilities = ((float[][])result.GetValue(jagged: true))[0];
for (int r = 0; r < probabilities.Length; r++)
{
if (probabilities[r] > best)
{
bestIdx = r;
best = probabilities[r];
}
}
Debug.Log("Tensorflow thinks this is: " + labels[bestIdx] + " Prob : " + best * 100);
}
so my guess is:
1.it has something to do with retrained graphs (because i can't find any application/test it is used and working).
2.It has something to do with how i handle the picture transform into a tensor?! (but if that is wrong i could need help there, the code further down)
to transform the picture i'am also using a graph like it is used in the tensorsharp example
public static class ImageUtil
{
// Convert the image in filename to a Tensor suitable as input to the Inception model.
public static TFTensor CreateTensorFromImageFile(byte[] contents, TFDataType destinationDataType = TFDataType.Float)
{
// DecodeJpeg uses a scalar String-valued tensor as input.
var tensor = TFTensor.CreateString(contents);
TFGraph graph;
TFOutput input, output;
// Construct a graph to normalize the image
ConstructGraphToNormalizeImage(out graph, out input, out output, destinationDataType);
// Execute that graph to normalize this one image
using (var session = new TFSession(graph))
{
var normalized = session.Run(
inputs: new[] { input },
inputValues: new[] { tensor },
outputs: new[] { output });
return normalized[0];
}
}
// The inception model takes as input the image described by a Tensor in a very
// specific normalized format (a particular image size, shape of the input tensor,
// normalized pixel values etc.).
//
// This function constructs a graph of TensorFlow operations which takes as
// input a JPEG-encoded string and returns a tensor suitable as input to the
// inception model.
private static void ConstructGraphToNormalizeImage(out TFGraph graph, out TFOutput input, out TFOutput output, TFDataType destinationDataType = TFDataType.Float)
{
// Some constants specific to the pre-trained model at:
// https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip
//
// - The model was trained after with images scaled to 224x224 pixels.
// - The colors, represented as R, G, B in 1-byte each were converted to
// float using (value - Mean)/Scale.
const int W = 299;
const int H = 299;
const float Mean = 128;
const float Scale = 1;
graph = new TFGraph();
input = graph.Placeholder(TFDataType.String);
output = graph.Cast(graph.Div(
x: graph.Sub(
x: graph.ResizeBilinear(
images: graph.ExpandDims(
input: graph.Cast(
graph.DecodeJpeg(contents: input, channels: 3), DstT: TFDataType.Float),
dim: graph.Const(0, "make_batch")),
size: graph.Const(new int[] { W, H }, "size")),
y: graph.Const(Mean, "mean")),
y: graph.Const(Scale, "scale")), destinationDataType);
}
}