-1

I used FTP and FTPClient in package 'org.apache.commons.net.ftp' to download files from FTP server.

Here is my total example code

public class FtpInput {

    private static final Logger LOG = Logger.getLogger(FtpInput.class);
    private static final int TIMEOUT = 120000;
    private static final String SIZE_COMMAND_REPLY_CODE = "213 ";

    /**
     * FTPClient
     */
    private FTPClient ftpClient;

    /**
     * FTP size
     */
    private long completeFileSize = 0;

    protected String ip = "";
    protected int port = 21;
    protected String user = "";
    protected String passwd = "";
    protected String path = "";
    protected String fileName = "";

    /**
     * count input bytes
     */
    private CountingInputStream is;

    /**
     * the bytes already processed
     */
    private long processedBytesNum;

    private byte[] inputBuffer = new byte[1024];

    /**
     * connect to ftp server and fetch inputStream
     */
    public void connect() {

        this.ftpClient = new FTPClient();
        ftpClient.setRemoteVerificationEnabled(false);
        try {
            ftpClient.connect(ip, port);
            if (!ftpClient.login(user, passwd)) {
                throw new IOException("ftp login failed!");
            }
            if (StringUtils.isNotBlank(path)) {
                if (!ftpClient.changeWorkingDirectory(path)) {
                    ftpClient.mkd(path);
                    if (!ftpClient.changeWorkingDirectory(path)) {
                        throw new IOException("ftp change working dir failed! path:" + path);
                    }
                }
            }
            ftpClient.setFileType(FTP.BINARY_FILE_TYPE);
            ftpClient.setSoTimeout(TIMEOUT);
            ftpClient.setConnectTimeout(TIMEOUT);
            ftpClient.setDataTimeout(TIMEOUT);
            ftpClient.enterLocalPassiveMode();
            // keep control channel keep-alive when download large file
            ftpClient.setControlKeepAliveTimeout(120);
        } catch (Throwable e) {
            e.printStackTrace();
            throw new RuntimeException("ftp login failed!", e);
        }
        // get complete ftp size
        completeFileSize = getFtpFileSize();
        LOG.info(String.format("ftp file size: %d", completeFileSize));
        try {
            InputStream ftpis = this.ftpClient.retrieveFileStream(this.fileName);
            if (ftpis == null) {
                LOG.error("cannot fetch source file.");
            }
            this.is = new CountingInputStream(ftpis);
        } catch (Throwable e) {
            e.printStackTrace();
            throw new RuntimeException(e.getMessage());
        }
    }

    /**
     * readBytes
     * 
     * @return
     */
    public byte[] readBytes() {
        byte[] bytes = readBytesFromStream(is, inputBuffer);
        // the bytes processed
        processedBytesNum = is.getCount();
        return bytes;
    }

    /**
     * readBytesFromStream
     * 
     * @param stream
     * @param inputBuffer
     * @return
     */
    protected byte[] readBytesFromStream(InputStream stream, byte[] inputBuffer) {
        Preconditions.checkNotNull(stream != null, "InputStream has not been inited yet.");
        Preconditions.checkArgument(inputBuffer != null && inputBuffer.length > 0);
        int readBytes;
        try {
            readBytes = stream.read(inputBuffer);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        if (readBytes == inputBuffer.length) {
            // inputBuffer is filled full.
            return inputBuffer;
        } else if (readBytes > 0 && readBytes < inputBuffer.length) {
            // inputBuffer is not filled full.
            byte[] tmpBytes = new byte[readBytes];
            System.arraycopy(inputBuffer, 0, tmpBytes, 0, readBytes);
            return tmpBytes;
        } else if (readBytes == -1) {
            // Read end.
            return null;
        } else {
            // may other situation happens?
            throw new RuntimeException(String.format("readBytesFromStream: readBytes=%s inputBuffer.length=%s",
                    readBytes, inputBuffer.length));
        }
    }

    /**
     * fetch the byte size of remote file size
     */
    private long getFtpFileSize() {
        try {
            ftpClient.sendCommand("SIZE", this.fileName);
            String reply = ftpClient.getReplyString().trim();
            LOG.info(String.format("ftp file %s size reply : %s", fileName, reply));
            Preconditions.checkArgument(reply.startsWith(SIZE_COMMAND_REPLY_CODE),
                    "ftp file size reply:  %s is not success", reply);
            String sizeSubStr = reply.substring(SIZE_COMMAND_REPLY_CODE.length());
            long actualFtpSize = Long.parseLong(sizeSubStr);
            return actualFtpSize;
        } catch (Throwable e) {
            e.printStackTrace();
            throw new RuntimeException(e.getMessage());
        }
    }

    public void close() {
        try {
            if (is != null) {
                LOG.info(String.format("already read %d bytes from ftp file %s", is.getCount(), fileName));
                is.close();
            }
            if (ftpClient != null) {
                // Must call completePendingCommand() to finish command.
                boolean isSuccessTransfer = ftpClient.completePendingCommand();
                if (!isSuccessTransfer) {
                    LOG.error("error happened when complete transfer of ftp");
                }
                ftpClient.logout();
                ftpClient.disconnect();
            }
        } catch (Throwable e) {
            e.printStackTrace();
            LOG.error(String.format("Close ftp input failed:%s,%s", e.getMessage(), e.getCause()));
        } finally {
            is = null;
            ftpClient = null;
        }
    }

    public void validInputComplete() {
        Preconditions.checkArgument(processedBytesNum == completeFileSize, "ftp file transfer is not complete");
    }


    /**
     * main
     * 
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub
        String ip = "***.***.***.****";
        int port = 21;
        String user = "***";
        String passwd = "***";
        String path = "/home/work";

        String fileName = "b.txt";

        FtpInput input = new FtpInput();
        try {
            input.fileName = fileName;
            input.path = path;
            input.ip = ip;
            input.port = port;
            input.user = user;
            input.passwd = passwd;

            // connect to FTP server
            input.connect();
            while (true) {
                // read bytes
                byte[] bytes = input.readBytes();
                if (bytes == null) {
                    break;
                }
                LOG.info("read " + bytes.length + " bytes at :" + new Date(System.currentTimeMillis()));
                // Attention: this is used for simulating the process of writing data into hive table
                // it maybe consume more than 1 minute;
                Thread.sleep(3000);
            }
            input.validInputComplete();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            input.close();
        }
    }

}

here is the exception message:

java.net.SocketTimeoutException: Read timed out
or
java.net.SocketException: Connection reset

at stream.readBytes in method readBytesFromStream

At first, i think it probably caused by writing into hive table slowly, and then the FTP Server closed the connection. But actually, the speed of writing into hive table is fast enough.

Now, i need your help, how can i fix this problem.

Martin Prikryl
  • 188,800
  • 56
  • 490
  • 992
yi jiang
  • 216
  • 1
  • 13
  • 1) At what point exactly do you get the exception? When calling the `retrieveFileStream`? Or while reading? 2) Do you manage to read anything? And if you do, is the amount the same every time or random? 3) Show us your "reading" code. 4) In the title you wrote "always throw", in the text you wrote "may throw", what is correct? 5) Can you download the file using any standalone FTP client on the same machine, you run the Java code? – Martin Prikryl Sep 05 '16 at 09:56
  • sorry for my incorrect expression, the exception was throws when i use inputstream.read method. It maybe happend at any time of the tranfer. If i used FTP client(not java api) i can download the file. – yi jiang Sep 05 '16 at 09:59
  • i add the read code in my question. – yi jiang Sep 05 '16 at 10:26
  • How often the problem happens? Can you capture an FTP traffic (or show us FTP server log) for a session that succeeds and a session that fails? – Martin Prikryl Sep 05 '16 at 10:36
  • Do you call `readBytesFromStream` repeatedly? Does the problem happen on the first call or in any call? If you call it repeatedly, show us the code that does that! Or actually, you should create [mcve]. – Martin Prikryl Sep 05 '16 at 10:39
  • I add an example code to my problem. – yi jiang Sep 06 '16 at 03:11
  • If you remove the `Thread.sleep`, does the problem still happen? – Martin Prikryl Sep 06 '16 at 06:09
  • sometime, I think "read time out" exception maybe reduced by increasing the time out setting of socket through FTPClient.setSoTimeOut and setDataTimeout, but i can't completely prevent the occurrence. As for "connection reset" exception, I have no idea. – yi jiang Sep 06 '16 at 06:52
  • Sorry, but I'm not sure if that's an answer to my last question or what. So again, can you make the code working by removing the `Thread.sleep` call, Yes or no? – Martin Prikryl Sep 06 '16 at 07:02
  • Thread.sleep is used for simulating the process of writing data into hive table. this is an example code, if i remove it, the code works well. But i am not sure whether the Thread.sleep have the same effect compared with the hive operation. – yi jiang Sep 06 '16 at 08:12
  • So how long in general does it take before the exception occurs? – Martin Prikryl Sep 06 '16 at 08:21
  • Do you mean the time between each reading data? It is not fixed, sometime is about half an hour, sometime is 1 minute. – yi jiang Sep 06 '16 at 08:46
  • I mean the time between the download starts and the error occurs. – Martin Prikryl Sep 06 '16 at 08:52
  • Your code is flawed. Your various read APIs are specified in such a way that they have to call `System.arrayCopy()`, and/or set instance variables to values that can't be returned by the method signatures. For the right way to do it, you don't have to look any further than `java.io.InputStream`. – user207421 Sep 06 '16 at 10:04
  • @MartinPrikryl, the time you mean is not fixed, it may happen at the begin or end for transfer. – yi jiang Sep 06 '16 at 11:19
  • @EJP, do you mean that I should not use InputStream.read(buffer) api? – yi jiang Sep 06 '16 at 11:22

1 Answers1

1

From your comments, it looks like it can take hours before you finish downloading the file.

You cannot reasonably expect an FTP server to wait for you for hours to finish the transfer. Particularly if you are not transferring anything most of the time. You waste server resources and most servers will protect themselves against such abuse.

Your design is flawed.

You should redesign your application to first fully download the file; and import the file only after the download finishes.

Martin Prikryl
  • 188,800
  • 56
  • 490
  • 992
  • I got what you mean. However, if I download the full file, it will cost extra storage. – yi jiang Sep 06 '16 at 11:26
  • Sure, there's an extract cost. But, what you are trying to do now, is to selfishly move that extra cost to the server-side and network. And the server won't like it. – Martin Prikryl Sep 06 '16 at 11:41
  • Do you need all file contents for the import or do you need some parts only? + Did you consider compressing the file on the fly, while downloading and storing? – Martin Prikryl Sep 06 '16 at 11:41
  • Yes, I need all file contents and now i used Disruptor RingBuffer as the SendBuffer to store data temporarily before I consume the data after reading from FTP or other data source. – yi jiang Sep 06 '16 at 11:46