As a part of my React (front) / Express (backend) project, I have a web crawler that crawls buffer, decodes then obtain desired data using cheerio.
(Just to clarify, I am not using any data obtained for commercial use, but only for a personal project.)
server.js:
require("dotenv").config();
const express = require("express");
const cors = require("cors");
const http = require("http");
const axios = require("axios");
const cheerio = require("cheerio");
const app = express();
app.set("port", process.env.PORT || 3001);
app.use(cors());
app.use(
express.json({
verify: (request, _, buffer) => {
request.buffer = buffer;
},
})
);
app.get("/", (_, response) => {
response.send("index");
});
app.get("/:menu_id", async (request, response) => {
try {
const buffer = await axios.get("https://cafe.naver.com/ArticleList.nhn", {
params: {
"search.clubid": 10050146,
"search.menuid": request.params.menu_id,
"search.boardType": "L",
userDisplay: 10,
},
headers: {
"Content-Type": "application/json",
},
responseType: "arraybuffer",
});
const decoder = new TextDecoder("EUC-KR");
const content = decoder.decode(buffer.data);
const $ = cheerio.load(content);
const list = $("div.article-board:nth-of-type(4)>table>tbody>tr");
let articles = [];
list.each((_, tag) => {
const url = `https://cafe.naver.com${$(tag).find("a.article").attr("href")}`;
const number = $(tag).find("div.inner_number").text();
const title = $(tag).find("a.article").contents().last().text().trim();
const author = $(tag).find(".p-nick>a").text();
const date = $(tag)
.find("td.td_date")
.text()
.replace(/(\d{4}).(\d{2}).(\d{2})./g, "$2-$3");
articles.push({ number, title, author, date, url });
});
response.send(articles);
} catch (error) {
response.send(error);
}
});
const server = http.createServer(app);
server.listen(app.get("port"), () => {
console.log(`App is listening to port: ${app.get("port")}`);
});
Then I have setup a proxy using http-proxy-middleware on front-end.
setupProxy.js:
const { createProxyMiddleware } = require("http-proxy-middleware");
module.exports = (app) => {
app.use(
createProxyMiddleware("/api", {
target: "http://localhost:3001",
changeOrigin: true,
pathRewrite: {
"^/api": "",
},
})
);
};
Articles.js:
import React, { useEffect, useState } from "react";
import axios from "axios";
import Article from "./Article";
import Loading from "./Loading";
const Articles = (props) => {
const [articles, setArticles] = useState();
useEffect(() => {
const getArticles = () => {
axios
.get(`/api/naver/cafe/${props.menu.id}`)
.then((response) => {
console.log(response);
setArticles(response.data);
})
.catch((error) => {
console.log(error);
});
};
getArticles();
}, []);
return (
<div className="articles">
<div className="menuTitle">{props.menu.title}</div>
{articles ? (
<ul className="menuContent">
{Object.entries(articles).map(([key, article]) => (
<Article article={article} key={key} />
))}
</ul>
) : (
<Loading />
)}
</div>
);
};
export default Articles;
So, this actually works and fetches data as I expected, but then when the server is left running idle, doing nothing, the client fails to fetch data from the server on page refresh (or maybe on a new session) and spits the long, long error below:
/project/node_modules/axios/dist/node/axios.cjs:725
AxiosError.call(axiosError, error.message, code, config, request, response);
^
AxiosError: read ECONNRESET
at AxiosError.from (/project/node_modules/axios/dist/node/axios.cjs:725:14)
at RedirectableRequest.handleRequestError (/project/node_modules/axios/dist/node/axios.cjs:2467:25)
at RedirectableRequest.emit (node:events:513:28)
at eventHandlers.<computed> (/project/node_modules/follow-redirects/index.js:14:24)
at ClientRequest.emit (node:events:513:28)
at TLSSocket.socketErrorListener (node:_http_client:494:9)
at TLSSocket.emit (node:events:513:28)
at emitErrorNT (node:internal/streams/destroy:151:8)
at emitErrorCloseNT (node:internal/streams/destroy:116:3)
at process.processTicksAndRejections (node:internal/process/task_queues:82:21) {
syscall: 'read',
code: 'ECONNRESET',
errno: -104,
config: {
transitional: {
silentJSONParsing: true,
forcedJSONParsing: true,
clarifyTimeoutError: false
},
adapter: [Function: httpAdapter],
transformRequest: [ [Function: transformRequest] ],
transformResponse: [ [Function: transformResponse] ],
timeout: 0,
xsrfCookieName: 'XSRF-TOKEN',
xsrfHeaderName: 'X-XSRF-TOKEN',
maxContentLength: -1,
maxBodyLength: -1,
env: {
FormData: [Function: FormData] {
LINE_BREAK: '\r\n',
DEFAULT_CONTENT_TYPE: 'application/octet-stream'
},
Blob: [class Blob]
},
validateStatus: [Function: validateStatus],
headers: AxiosHeaders {
'User-Agent': 'axios/1.1.3',
'Accept-Encoding': 'gzip, deflate, br',
[Symbol(defaults)]: { Accept: 'application/json, text/plain, */*' }
},
params: {
'search.clubid': 10050146,
'search.menuid': '385',
'search.boardType': 'L',
userDisplay: 10
},
responseType: 'arraybuffer',
method: 'get',
url: 'https://cafe.naver.com/ArticleList.nhn',
data: undefined
},
request: <ref *3> Writable {
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: false,
needDrain: false,
ending: false,
ended: false,
finished: false,
destroyed: false,
decodeStrings: true,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: true,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 0,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: true,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
_events: [Object: null prototype] {
response: [Function: handleResponse],
error: [Function: handleRequestError],
socket: [Function: handleRequestSocket]
},
_eventsCount: 3,
_maxListeners: undefined,
_options: {
maxRedirects: 21,
maxBodyLength: Infinity,
protocol: 'https:',
path: '/ArticleList.nhn?search.clubid=10050146&search.menuid=385&search.boardType=L&userDisplay=10',
method: 'GET',
headers: [Object: null prototype] {
Accept: 'application/json, text/plain, */*',
'User-Agent': 'axios/1.1.3',
'Accept-Encoding': 'gzip, deflate, br'
},
agents: { http: undefined, https: undefined },
auth: undefined,
beforeRedirect: [Function: dispatchBeforeRedirect],
beforeRedirects: { proxy: [Function: beforeRedirect] },
hostname: 'cafe.naver.com',
port: '',
agent: undefined,
nativeProtocols: {
'http:': {
_connectionListener: [Function: connectionListener],
METHODS: [
'ACL', 'BIND', 'CHECKOUT',
'CONNECT', 'COPY', 'DELETE',
'GET', 'HEAD', 'LINK',
'LOCK', 'M-SEARCH', 'MERGE',
'MKACTIVITY', 'MKCALENDAR', 'MKCOL',
'MOVE', 'NOTIFY', 'OPTIONS',
'PATCH', 'POST', 'PROPFIND',
'PROPPATCH', 'PURGE', 'PUT',
'REBIND', 'REPORT', 'SEARCH',
'SOURCE', 'SUBSCRIBE', 'TRACE',
'UNBIND', 'UNLINK', 'UNLOCK',
'UNSUBSCRIBE'
],
STATUS_CODES: {
'100': 'Continue',
'101': 'Switching Protocols',
'102': 'Processing',
'103': 'Early Hints',
'200': 'OK',
'201': 'Created',
'202': 'Accepted',
'203': 'Non-Authoritative Information',
'204': 'No Content',
'205': 'Reset Content',
'206': 'Partial Content',
'207': 'Multi-Status',
'208': 'Already Reported',
'226': 'IM Used',
'300': 'Multiple Choices',
'301': 'Moved Permanently',
'302': 'Found',
'303': 'See Other',
'304': 'Not Modified',
'305': 'Use Proxy',
'307': 'Temporary Redirect',
'308': 'Permanent Redirect',
'400': 'Bad Request',
'401': 'Unauthorized',
'402': 'Payment Required',
'403': 'Forbidden',
'404': 'Not Found',
'405': 'Method Not Allowed',
'406': 'Not Acceptable',
'407': 'Proxy Authentication Required',
'408': 'Request Timeout',
'409': 'Conflict',
'410': 'Gone',
'411': 'Length Required',
'412': 'Precondition Failed',
'413': 'Payload Too Large',
'414': 'URI Too Long',
'415': 'Unsupported Media Type',
'416': 'Range Not Satisfiable',
'417': 'Expectation Failed',
'418': "I'm a Teapot",
'421': 'Misdirected Request',
'422': 'Unprocessable Entity',
'423': 'Locked',
'424': 'Failed Dependency',
'425': 'Too Early',
'426': 'Upgrade Required',
'428': 'Precondition Required',
'429': 'Too Many Requests',
'431': 'Request Header Fields Too Large',
'451': 'Unavailable For Legal Reasons',
'500': 'Internal Server Error',
'501': 'Not Implemented',
'502': 'Bad Gateway',
'503': 'Service Unavailable',
'504': 'Gateway Timeout',
'505': 'HTTP Version Not Supported',
'506': 'Variant Also Negotiates',
'507': 'Insufficient Storage',
'508': 'Loop Detected',
'509': 'Bandwidth Limit Exceeded',
'510': 'Not Extended',
'511': 'Network Authentication Required'
},
Agent: [Function: Agent] { defaultMaxSockets: Infinity },
ClientRequest: [Function: ClientRequest],
IncomingMessage: [Function: IncomingMessage],
OutgoingMessage: [Function: OutgoingMessage],
Server: [Function: Server],
ServerResponse: [Function: ServerResponse],
createServer: [Function: createServer],
validateHeaderName: [Function: __node_internal_],
validateHeaderValue: [Function: __node_internal_],
get: [Function: get],
request: [Function: request],
setMaxIdleHTTPParsers: [Function: setMaxIdleHTTPParsers],
maxHeaderSize: [Getter],
globalAgent: [Getter/Setter]
},
'https:': {
Agent: [Function: Agent],
globalAgent: Agent {
_events: [Object: null prototype],
_eventsCount: 2,
_maxListeners: undefined,
defaultPort: 443,
protocol: 'https:',
options: [Object: null prototype],
requests: [Object: null prototype] {},
sockets: [Object: null prototype],
freeSockets: [Object: null prototype] {},
keepAliveMsecs: 1000,
keepAlive: false,
maxSockets: Infinity,
maxFreeSockets: 256,
scheduling: 'lifo',
maxTotalSockets: Infinity,
totalSocketCount: 1,
maxCachedSessions: 100,
_sessionCache: [Object],
[Symbol(kCapture)]: false
},
Server: [Function: Server],
createServer: [Function: createServer],
get: [Function: get],
request: [Function: request]
}
},
pathname: '/ArticleList.nhn',
search: '?search.clubid=10050146&search.menuid=385&search.boardType=L&userDisplay=10'
},
_ended: true,
_ending: true,
_redirectCount: 0,
_redirects: [],
_requestBodyLength: 0,
_requestBodyBuffers: [],
_onNativeResponse: [Function (anonymous)],
_currentRequest: <ref *1> ClientRequest {
_events: [Object: null prototype] {
response: [Function: bound onceWrapper] {
listener: [Function (anonymous)]
},
abort: [Function (anonymous)],
aborted: [Function (anonymous)],
connect: [Function (anonymous)],
error: [Function (anonymous)],
socket: [Function (anonymous)],
timeout: [Function (anonymous)]
},
_eventsCount: 7,
_maxListeners: undefined,
outputData: [],
outputSize: 0,
writable: true,
destroyed: false,
_last: true,
chunkedEncoding: false,
shouldKeepAlive: false,
maxRequestsOnConnectionReached: false,
_defaultKeepAlive: true,
useChunkedEncodingByDefault: false,
sendDate: false,
_removedConnection: false,
_removedContLen: false,
_removedTE: false,
strictContentLength: false,
_contentLength: 0,
_hasBody: true,
_trailer: '',
finished: true,
_headerSent: true,
_closed: false,
socket: <ref *2> TLSSocket {
_tlsOptions: {
allowHalfOpen: undefined,
pipe: false,
secureContext: SecureContext { context: SecureContext {} },
isServer: false,
requestCert: true,
rejectUnauthorized: true,
session: undefined,
ALPNProtocols: undefined,
requestOCSP: undefined,
enableTrace: undefined,
pskCallback: undefined,
highWaterMark: undefined,
onread: undefined,
signal: undefined
},
_secureEstablished: false,
_securePending: false,
_newSessionPending: false,
_controlReleased: true,
secureConnecting: true,
_SNICallback: null,
servername: null,
alpnProtocol: null,
authorized: false,
authorizationError: null,
encrypted: true,
_events: [Object: null prototype] {
close: [
[Function: onSocketCloseDestroySSL],
[Function],
[Function: onClose],
[Function: socketCloseListener]
],
end: [ [Function: onConnectEnd], [Function: onReadableStreamEnd] ],
newListener: [Function: keylogNewListener],
secure: [Function: onConnectSecure],
session: [Function (anonymous)],
free: [Function: onFree],
timeout: [Function: onTimeout],
agentRemove: [Function: onRemove],
error: [Function: socketErrorListener],
drain: [Function: ondrain]
},
_eventsCount: 10,
connecting: false,
_hadError: true,
_parent: null,
_host: 'cafe.naver.com',
_closeAfterHandlingError: false,
_readableState: ReadableState {
objectMode: false,
highWaterMark: 16384,
buffer: BufferList { head: null, tail: null, length: 0 },
length: 0,
pipes: [],
flowing: true,
ended: false,
endEmitted: false,
reading: true,
constructed: true,
sync: false,
needReadable: true,
emittedReadable: false,
readableListening: false,
resumeScheduled: false,
errorEmitted: true,
emitClose: false,
autoDestroy: true,
destroyed: true,
errored: Error: read ECONNRESET
at TLSWrap.onStreamRead (node:internal/stream_base_commons:217:20) {
errno: -104,
code: 'ECONNRESET',
syscall: 'read'
},
closed: true,
closeEmitted: true,
defaultEncoding: 'utf8',
awaitDrainWriters: null,
multiAwaitDrain: false,
readingMore: false,
dataEmitted: false,
decoder: null,
encoding: null,
[Symbol(kPaused)]: false
},
_maxListeners: undefined,
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: false,
needDrain: false,
ending: false,
ended: false,
finished: false,
destroyed: true,
decodeStrings: false,
defaultEncoding: 'utf8',
length: 253,
writing: true,
corked: 0,
sync: false,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: [Function: bound onFinish],
writelen: 253,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 1,
constructed: true,
prefinished: false,
errorEmitted: true,
emitClose: false,
autoDestroy: true,
errored: Error: read ECONNRESET
at TLSWrap.onStreamRead (node:internal/stream_base_commons:217:20) {
errno: -104,
code: 'ECONNRESET',
syscall: 'read'
},
closed: true,
closeEmitted: true,
[Symbol(kOnFinished)]: []
},
allowHalfOpen: false,
_sockname: null,
_pendingData: null,
_pendingEncoding: '',
server: undefined,
_server: null,
ssl: null,
_requestCert: true,
_rejectUnauthorized: true,
parser: null,
_httpMessage: [Circular *1],
[Symbol(res)]: TLSWrap {
_parent: TCP {
reading: [Getter/Setter],
onconnection: null,
[Symbol(owner_symbol)]: [Circular *2],
[Symbol(handle_onclose)]: [Function: done]
},
_parentWrap: undefined,
_secureContext: SecureContext { context: SecureContext {} },
reading: true,
onkeylog: [Function: onkeylog],
onhandshakestart: {},
onhandshakedone: [Function (anonymous)],
onocspresponse: [Function: onocspresponse],
onnewsession: [Function: onnewsessionclient],
onerror: [Function: onerror],
[Symbol(owner_symbol)]: [Circular *2]
},
[Symbol(verified)]: false,
[Symbol(pendingSession)]: null,
[Symbol(async_id_symbol)]: 18,
[Symbol(kHandle)]: null,
[Symbol(lastWriteQueueSize)]: 253,
[Symbol(timeout)]: null,
[Symbol(kBuffer)]: null,
[Symbol(kBufferCb)]: null,
[Symbol(kBufferGen)]: null,
[Symbol(kCapture)]: false,
[Symbol(kSetNoDelay)]: false,
[Symbol(kSetKeepAlive)]: true,
[Symbol(kSetKeepAliveInitialDelay)]: 60,
[Symbol(kBytesRead)]: 0,
[Symbol(kBytesWritten)]: 253,
[Symbol(connect-options)]: {
rejectUnauthorized: true,
ciphers: 'TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:TLS_AES_128_GCM_SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:DHE-RSA-AES256-SHA384:ECDHE-RSA-AES256-SHA256:DHE-RSA-AES256-SHA256:HIGH:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!MD5:!PSK:!SRP:!CAMELLIA',
checkServerIdentity: [Function: checkServerIdentity],
minDHSize: 1024,
maxRedirects: 21,
maxBodyLength: Infinity,
protocol: 'https:',
path: null,
method: 'GET',
headers: [Object: null prototype] {
Accept: 'application/json, text/plain, */*',
'User-Agent': 'axios/1.1.3',
'Accept-Encoding': 'gzip, deflate, br'
},
agents: { http: undefined, https: undefined },
auth: undefined,
beforeRedirect: [Function: dispatchBeforeRedirect],
beforeRedirects: { proxy: [Function: beforeRedirect] },
hostname: 'cafe.naver.com',
port: 443,
agent: undefined,
nativeProtocols: { 'http:': [Object], 'https:': [Object] },
pathname: '/ArticleList.nhn',
search: '?search.clubid=10050146&search.menuid=385&search.boardType=L&userDisplay=10',
_defaultAgent: Agent {
_events: [Object: null prototype],
_eventsCount: 2,
_maxListeners: undefined,
defaultPort: 443,
protocol: 'https:',
options: [Object: null prototype],
requests: [Object: null prototype] {},
sockets: [Object: null prototype],
freeSockets: [Object: null prototype] {},
keepAliveMsecs: 1000,
keepAlive: false,
maxSockets: Infinity,
maxFreeSockets: 256,
scheduling: 'lifo',
maxTotalSockets: Infinity,
totalSocketCount: 1,
maxCachedSessions: 100,
_sessionCache: [Object],
[Symbol(kCapture)]: false
},
host: 'cafe.naver.com',
noDelay: true,
servername: 'cafe.naver.com',
_agentKey: 'cafe.naver.com:443:::::::::::::::::::::',
encoding: null,
singleUse: true
}
},
_header: 'GET /ArticleList.nhn?search.clubid=10050146&search.menuid=385&search.boardType=L&userDisplay=10 HTTP/1.1\r\n' +
'Accept: application/json, text/plain, */*\r\n' +
'User-Agent: axios/1.1.3\r\n' +
'Accept-Encoding: gzip, deflate, br\r\n' +
'Host: cafe.naver.com\r\n' +
'Connection: close\r\n' +
'\r\n',
_keepAliveTimeout: 0,
_onPendingData: [Function: nop],
agent: Agent {
_events: [Object: null prototype] {
free: [Function (anonymous)],
newListener: [Function: maybeEnableKeylog]
},
_eventsCount: 2,
_maxListeners: undefined,
defaultPort: 443,
protocol: 'https:',
options: [Object: null prototype] { noDelay: true, path: null },
requests: [Object: null prototype] {},
sockets: [Object: null prototype] {
'cafe.naver.com:443:::::::::::::::::::::': [ [TLSSocket] ]
},
freeSockets: [Object: null prototype] {},
keepAliveMsecs: 1000,
keepAlive: false,
maxSockets: Infinity,
maxFreeSockets: 256,
scheduling: 'lifo',
maxTotalSockets: Infinity,
totalSocketCount: 1,
maxCachedSessions: 100,
_sessionCache: { map: {}, list: [] },
[Symbol(kCapture)]: false
},
socketPath: undefined,
method: 'GET',
maxHeaderSize: undefined,
insecureHTTPParser: undefined,
path: '/ArticleList.nhn?search.clubid=10050146&search.menuid=385&search.boardType=L&userDisplay=10',
_ended: false,
res: null,
aborted: false,
timeoutCb: null,
upgradeOrConnect: false,
parser: null,
maxHeadersCount: null,
reusedSocket: false,
host: 'cafe.naver.com',
protocol: 'https:',
_redirectable: [Circular *3],
[Symbol(kCapture)]: false,
[Symbol(kBytesWritten)]: 0,
[Symbol(kEndCalled)]: true,
[Symbol(kNeedDrain)]: false,
[Symbol(corked)]: 0,
[Symbol(kOutHeaders)]: [Object: null prototype] {
accept: [ 'Accept', 'application/json, text/plain, */*' ],
'user-agent': [ 'User-Agent', 'axios/1.1.3' ],
'accept-encoding': [ 'Accept-Encoding', 'gzip, deflate, br' ],
host: [ 'Host', 'cafe.naver.com' ]
},
[Symbol(kUniqueHeaders)]: null
},
_currentUrl: 'https://cafe.naver.com/ArticleList.nhn?search.clubid=10050146&search.menuid=385&search.boardType=L&userDisplay=10',
[Symbol(kCapture)]: false
},
cause: Error: read ECONNRESET
at TLSWrap.onStreamRead (node:internal/stream_base_commons:217:20) {
errno: -104,
code: 'ECONNRESET',
syscall: 'read'
}
}
It just feels like the client loses the connection to the server via proxy.
Here's the things I've tried:
- adding
connection: "keep-alive"
option tocreateProxyMiddleware
header - switching http request client to built in
fetch
fromaxios
- trying full length(?) url instead of shortified version (
/api/naver/cafe/${props.menu.id}
→http://localhost:3000/api/naver/cafe/${props.menu.id}
)
and nothing really resolved the issue.
The only way to fix this error is either by refreshing the page until it works or restarting the server.
I do have some Twitch's helix related apis as well, but this error only occurs with the crawling function above.
Literally spent hours and hours to troubleshoot this... and failed.
Any ideas, please?
I am using NodeJS v18.10.0 btw.