0

I have problem when executing this java code to import table from mysql into hive :

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import com.cloudera.sqoop.Sqoop;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.SqoopOptions.FileLayout;
import com.cloudera.sqoop.tool.ImportTool;
import com.mysql.jdbc.*;

public class SqoopExample {
    public static void main(String[] args) throws Exception {

        String driver = "com.mysql.jdbc.Driver";
        Class.forName(driver).newInstance();

        Configuration config = new Configuration();
        config.addResource(new Path("/home/socio/hadoop/etc/hadoop/core-site.xml"));
        config.addResource(new Path("/home/socio/hadoop/etc/hadoop/hdfs-site.xml"));

        FileSystem dfs = FileSystem.get(config);   

        SqoopOptions options = new SqoopOptions();

        options.setDriverClassName(driver);
        options.setConf(config);
        options.setHiveTableName("tlinesuccess");
        options.setConnManagerClassName("org.apache.sqoop.manager.GenericJdbcManager");
        options.setConnectString("jdbc:mysql://dba-virtual-machine/test");
        options.setHadoopMapRedHome("/home/socio/hadoop");
        options.setHiveHome("/home/socio/hive");
        options.setTableName("textlines");
        options.setColumns(new String[] {"line"});
        options.setUsername("socio");
        options.setNumMappers(1);
        options.setJobName("Test Import");
        options.setOverwriteHiveTable(true);
        options.setHiveImport(true);
        options.setFileLayout(FileLayout.TextFile);

        int ret = new ImportTool().run(options);
    }
}

result :

Exception in thread "main" java.io.IOException: No FileSystem for scheme: hdfs
    at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2385)
    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2392)
    at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:89)
    at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2431)
    at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2413)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:368)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:167)
    at SqoopExample.main(SqoopExample.java:22)

I specify that this command works sqoop import --connect jdbc:mysql://dba-virtual-machine/test \--username socio --table textlines \--columns line --hive-import. I can import from mysql with the shell using the command, the problem is with the java code.

Any help/ideas would be greatly appreciated.

Thanks

poué
  • 51
  • 2
  • 5
  • Could you add (failing) the code as well? – vhu Jun 30 '14 at 08:40
  • I believe that the problem is with the conf, when I remove the conf lines (config.addResource(...)), It works but the file is in local files – poué Jun 30 '14 at 12:50

3 Answers3

2

Add this plugin while making maven jar, it will merge all file systems in one, also add hadoop-hdfs, hadoop-client dependencies..

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <version>1.5</version>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>

                    <configuration>
                        <filters>
                            <filter>
                                <artifact>*:*</artifact>
                                <excludes>
                                    <exclude>META-INF/*.SF</exclude>
                                    <exclude>META-INF/*.DSA</exclude>
                                    <exclude>META-INF/*.RSA</exclude>
                                </excludes>
                            </filter>
                        </filters>
                        <shadedArtifactAttached>true</shadedArtifactAttached>
                        <shadedClassifierName>allinone</shadedClassifierName>
                        <artifactSet>
                            <includes>
                                <include>*:*</include>
                            </includes>
                        </artifactSet>
                        <transformers>
                            <transformer
                                implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                <resource>reference.conf</resource>
                            </transformer>
                            <transformer
                                implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                            </transformer>
                            <transformer 
                            implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer">
                            </transformer>
                        </transformers>
                    </configuration>
                </execution>
            </executions>
        </plugin>
Harish Pathak
  • 1,567
  • 1
  • 18
  • 32
1

HDFS file system is defined in the library hadoop-hdfs-2.0.0-cdhX.X.X.jar. If you are executing this as a java program you need to add this library to classpath.

Or this library would be available in hadoop classpath. Create a jar file and execute the jar using hadoop command.

SachinJose
  • 8,462
  • 4
  • 42
  • 63
1

If you are using Maven, this is also a good solution

https://stackoverflow.com/a/28135140/3451801

Basically you need to add hadoop-hdfs in your pom dependency.

Community
  • 1
  • 1
eric-haibin-lin
  • 377
  • 2
  • 9