8

'www.mysite.com/mySecretKey1' redirects to 'www.othersite.com/mySecretKey2'

in G.AppsScript:

  var response = UrlFetchApp.fetch("https://www.mysite.com/mySecretKey1");
  var headerString = response.getAllHeaders().toSource();
  Logger.log(headerString);
  //string 'www.othersite.com.my/SecretKey2' is not present in log.

How would the script discover the URL address that it is redirected to (i.e. the string 'www.othersite.com/mySecretKey2')?

UPDATE: More generally, how would the script discover the URL address from response?

Rubén
  • 34,714
  • 9
  • 70
  • 166
user3645994
  • 409
  • 6
  • 13

4 Answers4

10

Expounding on the answer by Joseph Combs, here's a version that uses recursion to follow multiple redirects, returning only the ultimate canonical URL:

function getRedirect(url) {
  var response = UrlFetchApp.fetch(url, {'followRedirects': false, 'muteHttpExceptions': false});
  var redirectUrl = response.getHeaders()['Location']; // undefined if no redirect, so...
  var responseCode = response.getResponseCode();
  if (redirectUrl) {                                   // ...if redirected...
    var nextRedirectUrl = getRedirect(redirectUrl);    // ...it calls itself recursively...
    Logger.log(url + " is redirecting to " + redirectUrl + ". (" + responseCode + ")");
    return nextRedirectUrl;
  }
  else {                                               // ...until it's not
    Logger.log(url + " is canonical. (" + responseCode + ")");
    return url;
  }
}  

function testGetRedirect() {
  Logger.log("Returned: " + getRedirect("http://wikipedia.org"));
}

This logs:

https://www.wikipedia.org/ is canonical. (200)
https://wikipedia.org/ is redirecting to https://www.wikipedia.org/. (301)
http://wikipedia.org is redirecting to https://wikipedia.org/. (301)
Returned: https://www.wikipedia.org/
Chris
  • 377
  • 4
  • 10
  • hi @Chris this doesn't work for google drive links like- https://drive.google.com/uc?id=1fu4kYrt10Lvx6UvqqR5Tw5IsfpX0PT1R . It doesn't return the final googleusercontent.com url. Any idea on how to achieve that? – Sam Oct 24 '22 at 13:45
4

UPDATE: More generally, how would the script discover the URL address from response?

Counterintuitively, you need to disable redirection and not mute HttpExceptions, like so:

var followedPost = UrlFetchApp.fetch(properUrl, {'followRedirects': false, 'muteHttpExceptions': false});
Logger.log(followedPost.getHeaders()['Location']);

The object returned by .getHeaders() will contain the new location of the resource being requested. Access that new location with a new .fetch().

Joseph Combs
  • 909
  • 9
  • 12
1

There is a native support in UrlFetchApp to follow redirects. You should try to set:

followRedirects = true

In the options you providing to UrlFetchApp. Something like that:

var options = {
   "followRedirects" : true
 };
var result = UrlFetchApp.getRequest("http://your-url", options);
user1981275
  • 13,002
  • 8
  • 72
  • 101
Ido Green
  • 2,795
  • 1
  • 17
  • 26
0

Here there is a google sheet that you can copy for free https://www.thetechseo.com/seo-tools/redirect-checker/

It works beautifully providing jumps, codes and destination.

enter image description here

Just in case (sheet/page gets lost) I paste here the code that goes inside the Script Editor (none of this code is mine).

function redirectCheck(url, user, pwd) {
  try {
    function getResp(url, user, pwd){  
      var resp = UrlFetchApp.fetch(url, {
        muteHttpExceptions: true,
        followRedirects: false,
        headers: {
          'Authorization': 'Basic ' + Utilities.base64Encode(user+':'+pwd)
        }
      });
      return resp;
    }


var response = getResp(url, user, pwd);
var rCode = response.getResponseCode();
var redirectCount = 0;
var tCode = rCode.toString();
var location = url;
var domain = getDomain(url);

while (rCode == 301 || rCode == 302 && redirectCount <= 10) {
  redirectCount++;
  header = response.getHeaders();
  location = getFullUrl(header['Location'],domain);
  domain = getDomain(location);
  Logger.log('location: '+location);
  response = getResp(location, user, pwd);
  rCode = response.getResponseCode(); 
  tCode = tCode + " > " + rCode.toString();
  Utilities.sleep(500);// pause in the loop for 500 milliseconds
}     


Logger.log('redirectCount: '+redirectCount);
return tCode + "|" + redirectCount + "|" + location;


  } catch (error) {
    Logger.log(error);
    return "Error| |"+error;
  }
}
function getDomain(url) {
  var domain = '',
      protocol;
  if (url.indexOf("://") > -1) {
    domain = url.split('/')[2];
    protocol = url.split('/')[0];    
    //remove port number
    domain = domain.split(':')[0];
    //add protocol back
    domain = protocol+"//"+domain;
  }  

  return domain;
}

function getFullUrl(url,prevDom) {
  var fullUrl,
      domain = getDomain(url);
  if(domain == ''){
    fullUrl = prevDom+url;
  } else {
    fullUrl = url;
  }       

  return fullUrl;
}

function redirectCheckTest() {
  var test = redirectCheck('http://blog.pexcard.com/contractors/building-budget-construction-business/');
  Logger.log('test: '+test);
}

And the formulas

In G

=IF(H11=200,"Not Redirected",IF(ISBLANK(C11),"",if(C11=J11,"Good","Bad")))

In H

=IF(ISBLANK(B11),"",split(redirectCheck(B11,$L$5,$L$6),"|"))

enter image description here

It allows you not only to find issues (wrong codes), but to improve the quality of the links by replacing them with the final destination.

Rub
  • 2,071
  • 21
  • 37