I know this is a very old question, but I went through the same dilemma and took me a lot of energy to figure out the solution. This answer is to make the next persons life a little easy. all the following is tested in redhat and debian.
what you need
the idea
Java Hadool implementation has built-in support for kerberos and it natively uses krb5 if the right ENV variables are set, no extra magic is needed. Since libhdfs
in the backend uses JAVA Hadoop java library to access HDFS, to if you have these variables set it should ride on the underlying native support to get your request authenticated.
the only thing that is needed now is the "ticket granting ticket" from the ADC. For this to work, you will need to first create a keytab file with your password baked into it and then use this keytab to generate the cache file and show it to java backend.
what do you do exactly
- set the environment variable, without this variable hadoop lib wont know that it needs to do a kerberos authentication.
export KRB5CCNAME=/location/of/cachefile
- generate a keytab file with ktutil (i am assuming that your /etc/krb5.conf is configured correctly). Check with your domain admin for the correct encryption.
ktutil
addent -password -p user.name -k 1 -e RC4-HMAC
wkt /location/of/keytabfile
- use the following "example" code to generate your cache file
#include <iostream>
#include <cstring>
#include <krb5/krb5.h>
bool renew(krb5_context &_ctx, krb5_keytab &_keytab, krb5_ccache &_cache, std::string &_principal)
{
long int retval;
bool success = false;
krb5_principal principal;
krb5_creds creds;
if ((retval = krb5_parse_name(_ctx, _principal.c_str(), &principal)))
throw "cannot parse principal string";
if ((retval = krb5_get_init_creds_keytab(_ctx, &creds, principal, _keytab, 0, NULL, NULL)))
{
krb5_free_principal(_ctx, principal);
throw "cannot initialize keytab credentials - ";
}
if ((retval = krb5_cc_initialize(_ctx, _cache, principal)))
{
krb5_free_principal(_ctx, principal);
throw " cannot initialize cache - ";
}
if ((retval = krb5_cc_store_cred(_ctx, _cache, creds)))
{
krb5_free_principal(_ctx, principal);
throw "cannot store credentials - ";
}
krb5_free_creds(_ctx, &creds);
krb5_free_principal(_ctx, principal);
return success;
}
int main()
{
long int retval;
std::string _keytab_file, _cache_file, _realm, _principal;
krb5_context _ctx = NULL;
krb5_keytab _keytab = NULL;
krb5_ccache _cache = NULL;
_keytab_file = "/location/of/keytabfile";
_cache_file = "/location/of/cachefile";
_principal = "user.name@DOMAIN.";
if ((retval = krb5_init_context(&_ctx)))
throw "cannot initialize context";
if ((retval = krb5_kt_resolve(_ctx, _keytab_file.c_str(), &_keytab)))
throw "cannot resolve keytab";
if ((retval = krb5_cc_resolve(_ctx, _cache_file.c_str(), &_cache)))
throw "cannot open/initialize kerberos cache";
try {
renew(_ctx, _keytab, _cache, _principal);
} catch (std::exception &e) {
std::cerr<<e.what()<<std::endl;
}
bailout:
if (_cache)
krb5_cc_close(_ctx, _cache);
if (_keytab)
krb5_kt_close(_ctx, _keytab);
if (_ctx)
krb5_free_context(_ctx);
return 0;
}
- use the following "example" code to connect to your hdfs and iterate over a directory content
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <linux/limits.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/stat.h>
#include <hdfs.h>
int main(int argc, char **argv)
{
const char* writePath = "/tmp/testfile.txt";
const char* listPath = "/tmp";
char kpath[PATH_MAX];
struct hdfsBuilder *bld = NULL;
hdfsFS fs = NULL;
bld = hdfsNewBuilder();
if (!bld)
{
fprintf(stderr, "Error alocating new builder!\n");
}
else
{
hdfsBuilderSetForceNewInstance(bld);
hdfsBuilderSetNameNode(bld, "hdfs://hdfs.domain.tld");
hdfsBuilderSetNameNodePort(bld, 8020);
strcpy(kpath, "/location/of/keytabfile");
hdfsBuilderSetKerbTicketCachePath(bld, kpath);
hdfsBuilderSetUserName(bld, "user.name"); // I believe this is optional
// End
fs = hdfsBuilderConnect(bld);
bld = NULL;
if (!fs)
{
fprintf(stderr, "connect issues!");
}
else
{
int cnt = 1;
hdfsFileInfo *hdfsList;
hdfsList = hdfsListDirectory(fs, listPath, &cnt);
if (hdfsList)
{
fprintf(stderr, "listDirectory: %d\n", cnt);
for (int i = 0; i < cnt; ++i)
{
printf("mName: %s, mKind: %c, Size: %jd\n", hdfsList[i].mName, hdfsList[i].mKind, hdfsList[i].mSize);
}
hdfsFreeFileInfo(hdfsList, cnt);
}
}
if (bld)
{
hdfsFreeBuilder(bld);
}
}
}
Hope this helps.