I am trying to build a console app with AutoML.NET for a regression problem. Although my code seems working, it never completes and just hanging at the line of the RunAsync() method. Below are my used libraries and data can be found at the (Data) link below.
Libraries:
Microsoft.ML = 2.0.1
Microsoft.ML.AutoML = 0.20.1
Microsoft.ML.LightGbm = 2.0.1
Note: I have created a separate filed called Data in my project where I store the data.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
//using Common;
// Initialize MLContext
using Microsoft.ML;
using Microsoft.ML.AutoML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using Microsoft.ML.Trainers.LightGbm;
using static Microsoft.ML.DataOperationsCatalog;
using System.Data;
//using static Microsoft.ML.Transforms.NormalizingEstimator;
//using Microsoft.ML.Transforms;
namespace TaxiFairPrediction
{
class Program
{
static async Task Main(string[] args)
{
Console.WriteLine("--------- Preparing AutoML experiment ---------");
var mlContext = new MLContext(seed: 1);
// Load and split data
//Console.WriteLine("Loading data...");
var projectDirectory = Directory.GetParent(Environment.CurrentDirectory).Parent.Parent.FullName;
Console.WriteLine($"PROJECT DIR :{projectDirectory}");
/// Read & Load Train/Test Data for the TaxiFareAmount Prediction
var TrainDataRelativePath = Path.Combine(projectDirectory, "Data", "taxi-fare-train.csv");
var TestDataRelativePath = Path.Combine(projectDirectory, "Data", "taxi-fare-test.csv");
var TrainData = mlContext.Data.LoadFromTextFile<TaxiFare>(TrainDataRelativePath, hasHeader: true, separatorChar: ',');
var TestData = mlContext.Data.LoadFromTextFile<TaxiFare>(TestDataRelativePath, hasHeader: true, separatorChar: ',');
var TrainSchema = TrainData.Schema;
Console.WriteLine($"Input schema: {TrainSchema}");
///
// Split data
//var testTrainDataSet = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);
//Transform
Console.WriteLine("Transforming data...");
// ---------------------- START example ----------------------
// Infer column information
Console.WriteLine("Step #1 ----> Infering column information");
ColumnInferenceResults columnInference =
mlContext.Auto().InferColumns(TrainDataRelativePath,
labelColumnName: "fare_amount",
groupColumns: false);
//Define pipeline
Console.WriteLine("Step #2 ----> Creating pipeline");
SweepablePipeline pipeline =
mlContext.Auto().Featurizer(TrainData,
columnInformation: columnInference.ColumnInformation)
.Append(mlContext.Auto().Regression(labelColumnName: columnInference.ColumnInformation.LabelColumnName));
// Create AutoML experiment
Console.WriteLine("Step #3 ----> Creating experiment");
AutoMLExperiment experiment = mlContext.Auto().CreateExperiment();
// Configure experiment
experiment
.SetPipeline(pipeline)
.SetRegressionMetric(RegressionMetric.RSquared,
labelColumn: columnInference.ColumnInformation.LabelColumnName)
.SetTrainingTimeInSeconds(10)
.SetRandomSearchTuner()
.SetDataset(TrainData);
// Log experiment trials
//var monitor = new AutoMLMonitor(pipeline);
//experiment.SetMonitor(monitor);
// Run experiment
Console.WriteLine("Step #4 ----> Run experiment");
//var cts = new CancellationTokenSource();
//TrialResult experimentResults = await experiment.RunAsync(cts.Token);
TrialResult experimentResults = await experiment.RunAsync();
// Done Training
Console.WriteLine("Step #5 ----> Training Completed");
// Get best model
var bestRun = experimentResults.Model;
// Get the name of the best model
string bestModelName = experimentResults.Model.GetType().FullName.ToString();
Console.WriteLine($"Best Model Name: {bestModelName}");
Console.WriteLine($"Rsquared on train data: {experimentResults.Metric.ToString()}");
IDataView testDataViewWithBestScore = bestRun.Transform(TestData);
RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore,
labelColumnName: "fare_amount");
Console.WriteLine($"Rsquared on validation data: {testMetrics.RSquared}");
// ---------------------- END example ----------------------
//return trainedModel;
Console.ReadKey();
}
public class TaxiFare
{
[LoadColumn(0)]
public string vendor_id;
[LoadColumn(1)]
public float rate_code;
[LoadColumn(2)]
public float passenger_count;
[LoadColumn(3)]
public float trip_time_in_secs;
[LoadColumn(4)]
public float trip_distance;
[LoadColumn(5)]
public string payment_type;
[LoadColumn(6)]
public float fare_amount;
}
}
}
Any ideas?