0

I have been querying and inserting data from and to Impala via ODBC, but it is slow (at least compared to a Postgres or SQL Server) and ODBC driver makes possible to execute queries one by one, which is absolutely not recommended as every insert creates a new file in HDFS.

I read through ODBC docs available on Simba site and Cloudera site, but batch operations or direct SQL execution is not mentioned.

This is the code I tried so far

static void Main(string[] args)
{
    string insert = $"INSERT INTO tbl(version, snapshot) " +
                    $"VALUES(?, ?)";

    OdbcConnection connection = new OdbcConnection("DSN=connection");
    connection.Open();

    using (OdbcCommand insertCommand = new OdbcCommand(insert, connection))
    {
        for (int i = 10; i < 15; i++)
        {
            List<OdbcParameter> parameters = new List<OdbcParameter>();

            OdbcParameter versionParam = new OdbcParameter("@version", OdbcType.Text);
            versionParam.Value = "bla" + i;
            parameters.Add(versionParam);

            OdbcParameter snapshotParam = new OdbcParameter("@snapshot", OdbcType.Text);
            snapshotParam.Value = "blabla" + i;
            parameters.Add(snapshotParam);

            insertCommand.Parameters.AddRange(parameters.ToArray());
        }

        string query = insertCommand.CommandText.ToString();
        Console.WriteLine(query);

        //insertCommand.ExecuteReader();
        insertCommand.ExecuteNonQuery();
    }
}

A single row is inserted however 5 should be. What I'm doing wrong?

marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
AndrasCsanyi
  • 3,943
  • 8
  • 45
  • 77

1 Answers1

2

The .ExecuteNonQuery() call needs to be inside the for (....) loop ..... on the other hand, the code to create the parameters must be outside the for () loop - inside the loop, you should only set the parameter's values - not keep re-creating them over and over again ....

Try this code:

static void Main(string[] args)
{
    string insert = $"INSERT INTO tbl(version, snapshot) VALUES(?, ?)";

    OdbcConnection connection = new OdbcConnection("DSN=connection");
    connection.Open();

    using (OdbcCommand insertCommand = new OdbcCommand(insert, connection))
    {
        // *create* the parameters *outside* the "for" loop - only once!
        List<OdbcParameter> parameters = new List<OdbcParameter>();

        OdbcParameter versionParam = new OdbcParameter("@version", OdbcType.Text);
        parameters.Add(versionParam);

        OdbcParameter snapshotParam = new OdbcParameter("@snapshot", OdbcType.Text);
        parameters.Add(snapshotParam);

        insertCommand.Parameters.AddRange(parameters.ToArray());

        for (int i = 10; i < 15; i++)
        {
            // inside the "for" loop - only set the values of the parameters 
            versionParam.Value = "bla" + i;
            snapshotParam.Value = "blabla" + i;

            // ... and then *execute* the query to run the insert!
            string query = insertCommand.CommandText.ToString();
            Console.WriteLine(query);

            insertCommand.ExecuteNonQuery();
        }
    }
}
marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
  • Thanks, I really appreciate it. This piece of code still creates multiple inserts, right? However, it faster at least 3 times than my code. – AndrasCsanyi Dec 27 '18 at 21:30