0

I am trying to use Google BigQuery SDK to do a query. It works fine on my local but in a VM it gets timed out all the time. My best guess is the proxy settings I have tried following way to set it but SDK client doesn't seem to pick it up

    System.setProperty("java.net.useSystemProxies", "true");
    System.setProperty("http.proxyHost", "http://xxxxx");
    System.setProperty("http.proxyPort", "3128");
    System.setProperty("https.proxyHost", "http://xxxxx");
    System.setProperty("https.proxyPort", "3128");

Also via application.properties

   https.proxyHost=http://xxxxx
   https.proxyPort=3128 

Here is my code . Is there any other way to set it or if I can debug it more . I have printed the Job and compared it local and VM both looks the same

```public List<Data> getData() {
    System.setProperty("java.net.useSystemProxies", "true");
    System.setProperty("http.proxyHost", "http://xxxx");
    System.setProperty("http.proxyPort", "3128");
    System.setProperty("https.proxyHost", "http://xxxxxx");
    System.setProperty("https.proxyPort", "3128");
    String query = "SELECT * FROM user";
    log.info("query --  " + query);
    double i = 0;
    try {
        File file = new File(FILE_PATH);
        if (!file.exists()) {
          throw new FileNotFoundException("Google credentials file not found ");
        }
     
        Credentials credentials = GoogleCredentials
            .fromStream(new FileInputStream(FILE_PATH)).toBuilder().build();
              BigQuery bigquery =
            BigQueryOptions.newBuilder()
                .setCredentials(credentials).build().getService();
        QueryJobConfiguration queryConfig =
            QueryJobConfiguration.newBuilder(
                query)
                // Use standard SQL syntax for queries.clea
                // See: https://cloud.google.com/bigquery/sql-reference/
                .setUseLegacySql(false)
                .build();
        // Create a job ID so that we can safely retry.
        JobId jobId = JobId.of(UUID.randomUUID().toString());
        Job queryJob =
            bigquery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build());
        // Wait for the query to complete.
        queryJob = queryJob.waitFor();

        // Check for errors
        if (queryJob == null) {
          throw new RuntimeException("Job no longer exists");
        } else if (queryJob.getStatus().getError() != null) {
          // You can also look at queryJob.getStatus().getExecutionErrors() for all
          // errors, not just the latest one.
          throw new RuntimeException(queryJob.getStatus().getError().toString());
        }
        // Get the results.
        TableResult result = queryJob.getQueryResults();
        // Print all pages of the results.
        for (FieldValueList row : result.iterateAll()) {
          i++;
          System.out.println(row)
    
        }
      } catch (Exception e) {
        e.printStackTrace();
      }
      log.info(" data received ==" + i + " == ");
      return arrayList;
    }```

And the error log

      at com.google.cloud.bigquery.spi.v2.HttpBigQueryRpc.translate(HttpBigQueryRpc.java:106)
      at com.google.cloud.bigquery.spi.v2.HttpBigQueryRpc.create(HttpBigQueryRpc.java:206)
      at com.google.cloud.bigquery.BigQueryImpl$5.call(BigQueryImpl.java:324)
      at com.google.cloud.bigquery.BigQueryImpl$5.call(BigQueryImpl.java:321)
      at com.google.api.gax.retrying.DirectRetryingExecutor.submit(DirectRetryingExecutor.java:105)
      at com.google.cloud.RetryHelper.run(RetryHelper.java:76)
      at com.google.cloud.RetryHelper.runWithRetries(RetryHelper.java:50)
      at com.google.cloud.bigquery.BigQueryImpl.create(BigQueryImpl.java:320)
      at com.google.cloud.bigquery.BigQueryImpl.create(BigQueryImpl.java:295)
      at uk.ac.ebi.usage.portal.tscusage.cloud.GoogleCloud.FetchData.getData(FetchData.java:138)
      at uk.ac.ebi.usage.portal.tscusage.cloud.GoogleCloud.GoogleCloudService.execute(GoogleCloudService.java:39)
      at uk.ac.ebi.usage.portal.tscusage.TscUsageApplication.lambda$commandLineRunnerAllService$0(TscUsageApplication.java:162)
      at org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:732)
      at org.springframework.boot.SpringApplication.callRunners(SpringApplication.java:716)
      at org.springframework.boot.SpringApplication.afterRefresh(SpringApplication.java:703)
      at org.springframework.boot.SpringApplication.run(SpringApplication.java:304)
      at org.springframework.boot.SpringApplication.run(SpringApplication.java:1118)
      at org.springframework.boot.SpringApplication.run(SpringApplication.java:1107)
      at uk.ac.ebi.usage.portal.tscusage.TscUsageApplication.main(TscUsageApplication.java:89)
      at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
      at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
      at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
      at java.lang.reflect.Method.invoke(Method.java:498)
      at org.springframework.boot.loader.MainMethodRunner.run(MainMethodRunner.java:48)
      at org.springframework.boot.loader.Launcher.launch(Launcher.java:87)
      at org.springframework.boot.loader.Launcher.launch(Launcher.java:50)
      at org.springframework.boot.loader.JarLauncher.main(JarLauncher.java:51)
  Caused by: java.io.IOException: Error getting access token for service account: connect timed out
      at com.google.auth.oauth2.ServiceAccountCredentials.refreshAccessToken(ServiceAccountCredentials.java:444)
      at com.google.auth.oauth2.OAuth2Credentials.refresh(OAuth2Credentials.java:157)
      at com.google.auth.oauth2.OAuth2Credentials.getRequestMetadata(OAuth2Credentials.java:145)
      at com.google.auth.oauth2.ServiceAccountCredentials.getRequestMetadata(ServiceAccountCredentials.java:603)
      at com.google.auth.http.HttpCredentialsAdapter.initialize(HttpCredentialsAdapter.java:91)
      at com.google.cloud.http.HttpTransportOptions$1.initialize(HttpTransportOptions.java:159)
      at com.google.api.client.http.HttpRequestFactory.buildRequest(HttpRequestFactory.java:88)
      at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.buildHttpRequest(AbstractGoogleClientRequest.java:422)
      at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:541)
      at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:474)
      at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.execute(AbstractGoogleClientRequest.java:591)
      at com.google.cloud.bigquery.spi.v2.HttpBigQueryRpc.create(HttpBigQueryRpc.java:204)
      ... 25 more
  Caused by: java.net.SocketTimeoutException: connect timed out
      at java.net.PlainSocketImpl.socketConnect(Native Method)
      at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
      at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
      at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
      at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
      at java.net.Socket.connect(Socket.java:589)
      at sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:668)
      at sun.net.NetworkClient.doConnect(NetworkClient.java:175)
      at sun.net.www.http.HttpClient.openServer(HttpClient.java:432)
      at sun.net.www.http.HttpClient.openServer(HttpClient.java:527)
      at sun.net.www.protocol.https.HttpsClient.<init>(HttpsClient.java:264)
      at sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:367)
      at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:200)
      at sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1124)
      at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:999)
      at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:177)
      at sun.net.www.protocol.http.HttpURLConnection.getOutputStream0(HttpURLConnection.java:1283)
      at sun.net.www.protocol.http.HttpURLConnection.getOutputStream(HttpURLConnection.java:1258)
      at sun.net.www.protocol.https.HttpsURLConnectionImpl.getOutputStream(HttpsURLConnectionImpl.java:250)
      at com.google.api.client.http.javanet.NetHttpRequest.execute(NetHttpRequest.java:113)
      at com.google.api.client.http.javanet.NetHttpRequest.execute(NetHttpRequest.java:84)
      at com.google.api.client.http.HttpRequest.execute(HttpRequest.java:1012)
      at com.google.auth.oauth2.ServiceAccountCredentials.refreshAccessToken(ServiceAccountCredentials.java:441)
      ... 36 more

I have tried with cURL its able to connect

    curl -v "https://console.cloud.google.com/bigquery?project=xxxxxx&page=queryresults"

    About to connect() to proxy xxxproxyxxxx port 3128 (#0)
    Trying 10.7.48.45... connected
    Connected to xxxproxyxxxx (10.7.48.45) port 3128 (#0)
    Establish HTTP proxy tunnel to console.cloud.google.com:443
    CONNECT console.cloud.google.com:443 HTTP/1.1
    Host: console.cloud.google.com:443
    User-Agent: curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 
    NSS/3.27.1 zlib/1.2.3 libidn/1.18 libssh2/1.4.2
    Proxy-Connection: Keep-Alive
    HTTP/1.1 200 Connection established
    Proxy replied OK to CONNECT request
    Initializing NSS with certpath: sql:/etc/pki/nssdb
    CAfile: /etc/pki/tls/certs/ca-bundle.crt
    CApath: none
    SSL connection using TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
    Server certificate:
    subject: CN=*.googlecode.com,O=Google LLC,L=Mountain 
    View,ST=California,C=US
    start date: Jul 07 08:08:04 2020 GMT
    expire date: Sep 29 08:08:04 2020 GMT
    common name: *.googlecode.com
    issuer: CN=GTS CA 1O1,O=Google Trust Services,C=US
    GET /bigquery?project=xxxxxx&page=queryresults HTTP/1.1
    User-Agent: curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 
   NSS/3.27.1 zlib/1.2.3 libidn/1.18 libssh2/1.4.2
   Host: console.cloud.google.com
   Accept: */*
   HTTP/1.1 302 Found
   Content-Type: application/binary
   X-Frame-Options: DENY
  Location: https://accounts.google.com/ServiceLogin? 
 service=cloudconsole&passive=1209600&osid=1&continue=https://console.cloud.google.com/bigquery?project%3Dxxxx%3Dqueryresults&followup=https://console.cloud.google.com/bigquery?project%3Dxxxx%26j%3Dbq:EU:bquxjob_1207bd95_1736ccef796%26page%3Dqueryresults
  x-debug-tracking-id: 1864997233354316760;o=0
   P3P: CP="This is not a P3P policy! See g.co/p3phelp for more info."
  Strict-Transport-Security: max-age=31536000
< Date: Thu, 23 Jul 2020 13:32:26 GMT
< Server: ESF
< Content-Length: 0
< X-XSS-Protection: 0
< X-Content-Type-Options: nosniff
< Set-Cookie: NID=204=fzP-6QVkBMPOJ5GhTaskE4leuqZr0UHIUrAUNd1dx4VUQV-eIEMNHrtjt1QCJezFtivTAzPOiY2Fmd3Kegm2KMaWj-hLoIJsG37XgAQXEsghxWZ_aJnZblf6Ro__v9kn0jTxxfIr0PffUMvpCLV01gj9eWwyOUKGupGvqx6_OpA; expires=Fri, 22-Jan-2021 13:32:26 GMT; path=/; domain=.google.com; HttpOnly
  Alt-Svc: h3-29=":443"; ma=2592000,h3-27=":443"; ma=2592000,h3-T050=":443"; ma=2592000,h3-Q050=":443"; ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; ma=2592000; v="46,43"
  
  Connection #0 to host xxxproxyxxxx left intact
  Closing connection #0

Thanks for the HELP

Andie Vanille
  • 820
  • 5
  • 14
Pritbh
  • 120
  • 1
  • 12
  • Looks like a firewall blocking your egress traffic. Can you SSH onto the VM and check outbound connectivity e.g. using cURL or something? – Graham Polley Jul 28 '20 at 13:30
  • Hi @GrahamPolley I have updated the question thanks – Pritbh Jul 28 '20 at 14:19
  • So, cURL directly from the VM worked? – Graham Polley Jul 29 '20 at 00:43
  • yup it does but it goes via proxy .. so I am wondering if there is something getting dropped around proxy @GrahamPolley – Pritbh Jul 29 '20 at 09:36
  • I am wondering if there is a way that can check if the proxy is actually being used by Google client ?? .. I know in java that can be InetSocketAddress addr = (InetSocketAddress) proxy.address() and I have done it it does print out proxy hostname and port correctly – Pritbh Jul 29 '20 at 10:35
  • Based on the provided information, it looks like you are trying to access Bigquery from an external to GCP source. However, I don't see any sign of the service account credentials which are required to access Bigquery. Additionally, as per GCP documentation[1], would you mind also confirming if you have created a service account using your credentials and passed them along to your Java code (based on this doc[1]) so that you can access bigquery. – Dattu Pragnu Nellutla Aug 05 '20 at 20:12
  • @GrahamPolley Note that the curl test was run against `https://console.cloud.google.com/bigquery`, but the code in the Java library calls `https://bigquery.googleapis.com/bigquery/v2/`. – Jofre Aug 20 '20 at 07:28
  • Also, permission errors return a 403, not a connection timeout, so I don't think this is related to IAM permissions as @Dattu suggested. This still looks a lot like a network configuration error. – Jofre Aug 20 '20 at 07:39
  • @pritbh I have got hit with the same problem, any suggestion?? – Susanta Adhikary Jul 05 '21 at 10:19

2 Answers2

0

You have to override the ApacheHttpTransport() method to set custom header(proxy setting). I could solve the issue by doing the below step.

I was using it for querying bigquery service:

HttpHost proxy = new HttpHost("proxy--host//IP",proxy--port);

DefaultHttpClient httpClient = new DefaultHttpClient();

httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);

    httpClient.addRequestInterceptor(new HttpRequestInterceptor(){
                @Override
                public void process(org.apache.http.HttpRequest request, HttpContext context) throws HttpException, IOException {
                    if (request.getRequestLine().getMethod().equals("CONNECT"))
                        request.addHeader(new BasicHeader("Proxy-Authorization","Basic dXNlcjE6dXNlcjE="));
                }
            });

ApacheHttpTransport mHttpTransport = new ApacheHttpTransport(httpClient);

     HttpTransportFactory hf = new HttpTransportFactory(){
                @Override
                public HttpTransport create() {
                    return mHttpTransport;
                }
            };

FileInputStream serviceAccountStream = new FileInputStream(credentialsPath);
credentials= ServiceAccountCredentials.fromStream(serviceAccountStream,hf);
TransportOptions options = HttpTransportOptions.newBuilder().setHttpTransportFactory(hf).build();

if (credentials.createScopedRequired()) {
                Collection<String> bigqueryScopes = BigqueryScopes.all();
                credentials = credentials.createScoped(bigqueryScopes);
            }

BigQuery bigquery = BigQueryOptions
                    .newBuilder()
                    .setCredentials(credentials)
                    .setProjectId(projectId)
                    .setTransportOptions(options)
                    .build()
                    .getService();


QueryJobConfiguration queryConfig =
                    QueryJobConfiguration.newBuilder(query)
                            .setUseLegacySql(false)
                            .build();

String jobName = "jobId_" + UUID.randomUUID().toString();
JobId jobId = JobId.newBuilder().setJob(jobName).build();
queryJob = bigquery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build());
queryJob = queryJob.waitFor();
marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
Susanta Adhikary
  • 257
  • 2
  • 6
  • 20
  • I was doing this to test the functionality locally with my own GCP account from my company k8s pod. Ideally in production scenario any internet connection should be disallowed and one should you dedicated interconnect – Susanta Adhikary Jul 15 '21 at 07:11
0

I think you can use the same way as Google explains in their Github API repositories:

import java.io.IOException;

public class ProxyExample {
  public GoogleCredentials getCredentials() throws IOException {
    HttpTransportFactory httpTransportFactory = getHttpTransportFactory(
        "some-host", 8080, "some-username", "some-password"
    );

    return GoogleCredentials.getApplicationDefault(httpTransportFactory);
  }

  public HttpTransportFactory getHttpTransportFactory(String proxyHost, int proxyPort, String proxyUsername, String proxyPassword) {
    HttpHost proxyHostDetails = new HttpHost(proxyHost, proxyPort);
    HttpRoutePlanner httpRoutePlanner = new DefaultProxyRoutePlanner(proxyHostDetails);

    CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
    credentialsProvider.setCredentials(
        new AuthScope(proxyHostDetails.getHostName(), proxyHostDetails.getPort()),
        new UsernamePasswordCredentials(proxyUsername, proxyPassword)
    );

    HttpClient httpClient = ApacheHttpTransport.newDefaultHttpClientBuilder()
        .setRoutePlanner(httpRoutePlanner)
        .setProxyAuthenticationStrategy(ProxyAuthenticationStrategy.INSTANCE)
        .setDefaultCredentialsProvider(credentialsProvider)
        .build();

    final HttpTransport httpTransport = new ApacheHttpTransport(httpClient);
    return new HttpTransportFactory() {
      @Override
      public HttpTransport create() {
        return httpTransport;
      }
    };
  }
}
Garrampa
  • 9
  • 1