I used FTP and FTPClient in package 'org.apache.commons.net.ftp' to download files from FTP server.
Here is my total example code
public class FtpInput {
private static final Logger LOG = Logger.getLogger(FtpInput.class);
private static final int TIMEOUT = 120000;
private static final String SIZE_COMMAND_REPLY_CODE = "213 ";
/**
* FTPClient
*/
private FTPClient ftpClient;
/**
* FTP size
*/
private long completeFileSize = 0;
protected String ip = "";
protected int port = 21;
protected String user = "";
protected String passwd = "";
protected String path = "";
protected String fileName = "";
/**
* count input bytes
*/
private CountingInputStream is;
/**
* the bytes already processed
*/
private long processedBytesNum;
private byte[] inputBuffer = new byte[1024];
/**
* connect to ftp server and fetch inputStream
*/
public void connect() {
this.ftpClient = new FTPClient();
ftpClient.setRemoteVerificationEnabled(false);
try {
ftpClient.connect(ip, port);
if (!ftpClient.login(user, passwd)) {
throw new IOException("ftp login failed!");
}
if (StringUtils.isNotBlank(path)) {
if (!ftpClient.changeWorkingDirectory(path)) {
ftpClient.mkd(path);
if (!ftpClient.changeWorkingDirectory(path)) {
throw new IOException("ftp change working dir failed! path:" + path);
}
}
}
ftpClient.setFileType(FTP.BINARY_FILE_TYPE);
ftpClient.setSoTimeout(TIMEOUT);
ftpClient.setConnectTimeout(TIMEOUT);
ftpClient.setDataTimeout(TIMEOUT);
ftpClient.enterLocalPassiveMode();
// keep control channel keep-alive when download large file
ftpClient.setControlKeepAliveTimeout(120);
} catch (Throwable e) {
e.printStackTrace();
throw new RuntimeException("ftp login failed!", e);
}
// get complete ftp size
completeFileSize = getFtpFileSize();
LOG.info(String.format("ftp file size: %d", completeFileSize));
try {
InputStream ftpis = this.ftpClient.retrieveFileStream(this.fileName);
if (ftpis == null) {
LOG.error("cannot fetch source file.");
}
this.is = new CountingInputStream(ftpis);
} catch (Throwable e) {
e.printStackTrace();
throw new RuntimeException(e.getMessage());
}
}
/**
* readBytes
*
* @return
*/
public byte[] readBytes() {
byte[] bytes = readBytesFromStream(is, inputBuffer);
// the bytes processed
processedBytesNum = is.getCount();
return bytes;
}
/**
* readBytesFromStream
*
* @param stream
* @param inputBuffer
* @return
*/
protected byte[] readBytesFromStream(InputStream stream, byte[] inputBuffer) {
Preconditions.checkNotNull(stream != null, "InputStream has not been inited yet.");
Preconditions.checkArgument(inputBuffer != null && inputBuffer.length > 0);
int readBytes;
try {
readBytes = stream.read(inputBuffer);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (readBytes == inputBuffer.length) {
// inputBuffer is filled full.
return inputBuffer;
} else if (readBytes > 0 && readBytes < inputBuffer.length) {
// inputBuffer is not filled full.
byte[] tmpBytes = new byte[readBytes];
System.arraycopy(inputBuffer, 0, tmpBytes, 0, readBytes);
return tmpBytes;
} else if (readBytes == -1) {
// Read end.
return null;
} else {
// may other situation happens?
throw new RuntimeException(String.format("readBytesFromStream: readBytes=%s inputBuffer.length=%s",
readBytes, inputBuffer.length));
}
}
/**
* fetch the byte size of remote file size
*/
private long getFtpFileSize() {
try {
ftpClient.sendCommand("SIZE", this.fileName);
String reply = ftpClient.getReplyString().trim();
LOG.info(String.format("ftp file %s size reply : %s", fileName, reply));
Preconditions.checkArgument(reply.startsWith(SIZE_COMMAND_REPLY_CODE),
"ftp file size reply: %s is not success", reply);
String sizeSubStr = reply.substring(SIZE_COMMAND_REPLY_CODE.length());
long actualFtpSize = Long.parseLong(sizeSubStr);
return actualFtpSize;
} catch (Throwable e) {
e.printStackTrace();
throw new RuntimeException(e.getMessage());
}
}
public void close() {
try {
if (is != null) {
LOG.info(String.format("already read %d bytes from ftp file %s", is.getCount(), fileName));
is.close();
}
if (ftpClient != null) {
// Must call completePendingCommand() to finish command.
boolean isSuccessTransfer = ftpClient.completePendingCommand();
if (!isSuccessTransfer) {
LOG.error("error happened when complete transfer of ftp");
}
ftpClient.logout();
ftpClient.disconnect();
}
} catch (Throwable e) {
e.printStackTrace();
LOG.error(String.format("Close ftp input failed:%s,%s", e.getMessage(), e.getCause()));
} finally {
is = null;
ftpClient = null;
}
}
public void validInputComplete() {
Preconditions.checkArgument(processedBytesNum == completeFileSize, "ftp file transfer is not complete");
}
/**
* main
*
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String ip = "***.***.***.****";
int port = 21;
String user = "***";
String passwd = "***";
String path = "/home/work";
String fileName = "b.txt";
FtpInput input = new FtpInput();
try {
input.fileName = fileName;
input.path = path;
input.ip = ip;
input.port = port;
input.user = user;
input.passwd = passwd;
// connect to FTP server
input.connect();
while (true) {
// read bytes
byte[] bytes = input.readBytes();
if (bytes == null) {
break;
}
LOG.info("read " + bytes.length + " bytes at :" + new Date(System.currentTimeMillis()));
// Attention: this is used for simulating the process of writing data into hive table
// it maybe consume more than 1 minute;
Thread.sleep(3000);
}
input.validInputComplete();
} catch (Exception e) {
e.printStackTrace();
} finally {
input.close();
}
}
}
here is the exception message:
java.net.SocketTimeoutException: Read timed out
or
java.net.SocketException: Connection reset
at stream.readBytes in method readBytesFromStream
At first, i think it probably caused by writing into hive table slowly, and then the FTP Server closed the connection. But actually, the speed of writing into hive table is fast enough.
Now, i need your help, how can i fix this problem.