0

Hi there Im working on a project for estimation Above ground Biomass in hilly terrain for mature forests and asscessing accuracy of different classifiers availiable in GEE such as Random forest and other python based models such as Extreme gradient Boosting(XGB) and catBoost methods. The problem im facing in GEE for all classifiers is that only sentinel 2 bands provide an accuracy of 0.4 or 40% addition of further indices and terrain products significantly reduce the accuracy instead of increasing whereas the added bands have more variable importance in RF variable importance graph. What could be the issue with the given code for RF regression. As in general more predictor bands leads to better output.Additional indices are commented in line 166 and code provide maximum accuracy in this case and when we uncomment these line to add other bands to train the classifier accuracy is reduced. Here is the link to GEE Code: https://code.earthengine.google.com/714f9f4d567f39d2f3d2623243bff289)

////Study area 
var pakistan = ee.FeatureCollection("users/asimqadeer926/Pakistan_Admin_Boundary");
var diamir = pakistan
          .filter(ee.Filter.eq('adm2_name', 'Diamir'));
                                
Map.addLayer(diamir, {color: 'red'}, "Diamir District", true, 1); 
Map.centerObject(diamir, 8);

///////////////////////  Terrain /////////////////

// Load the Digital Elevation Model (DEM)
var dem = ee.Image("NASA/NASADEM_HGT/001");
// Define the region of interest as Diamir district
var district = ee.FeatureCollection("projects/ee-asimqadeer926/assets/diamir");

// Resample the DEM to Sentinel 2 resolution
var sentinel2 = ee.ImageCollection("COPERNICUS/S2").filterBounds(district)
  .sort('CLOUD_COVERAGE_ASSESSMENT')
  .first().select('B3');
var s2_resolution = sentinel2.select('B3').projection().nominalScale().getInfo();
var dem_resampled = dem.resample('bicubic').reproject(sentinel2.projection(), null, s2_resolution).clip(district);
// Compute slope
var slope = ee.Terrain.slope(dem_resampled);
// Compute aspect
var aspect = ee.Terrain.aspect(dem_resampled);
// Compute hillshade
var hillshade = ee.Terrain.hillshade(dem_resampled);
var dem_clip = dem.clip(district)

/////////////////// Indices ///////////////////////////

// Load the Sentinel 2 dataset for year 2016
var sentinel2 = ee.ImageCollection('COPERNICUS/S2')
    .filterDate('2016-01-01', '2016-12-31')
    .filterBounds(diamir);

// Get the median image for the year
var medianImage = sentinel2.median().clip(diamir);

// Define the functions for each index calculation
var ndvi = function(image) {
  return image.expression('(B8 - B4) / (B8 + B4)', {
    'B8': image.select('B8'),
    'B4': image.select('B4')
  });
};

var ndvi_n = function(image) {
  return image.expression('(B8A - B4) / (B8A + B4)', {
    'B8A': image.select('B8A'),
    'B4': image.select('B4')
  });
};

var ndvi_re = function(image) {
  return image.expression('(B8 - B5) / (B8 + B5)', {
    'B8': image.select('B8'),
    'B5': image.select('B5')
  });
};

var ndre_n = function(image) {
  return image.expression('(B8A - B5) / (B8A + B5)', {
    'B8A': image.select('B8A'),
    'B5': image.select('B5')
  });
};

var rendvi = function(image) {
  return image.expression('(B6 - B5) / (B6 + B5)', {
    'B6': image.select('B6'),
    'B5': image.select('B5')
  });
};

var ndi45 = function(image) {
  return image.expression('(B5 - B4) / (B5 + B4)', {
    'B5': image.select('B5'),
    'B4': image.select('B4')
  });
};

var wdrvi_1 = function(image) {
  return image.expression('(0.1 * B8A - B4) / (0.1 * B8A + B4)', {
    'B8A': image.select('B8A'),
    'B4': image.select('B4')
  });
};

var wdrvi_2 = function(image) {
  return image.expression('(0.2 * B8A - B4) / (0.2 * B8A + B4)', {
    'B8A': image.select('B8A'),
    'B4': image.select('B4')
  });
};

var ndwi = function(image) {
  return image.expression('(B8A - B11) / (B8A + B11)', {
    'B8A': image.select('B8A'),
    'B11': image.select('B11')
  });
};

// Apply the functions to the median image
var ndviImage = ndvi(medianImage).rename('ndviImage');
var ndvi_nImage = ndvi_n(medianImage).rename('ndvi_nImage');
var ndvi_reImage = ndvi_re(medianImage).rename('ndvi_reImage');
var ndre_nImage = ndre_n(medianImage).rename('ndre_nImage');
var rendviImage = rendvi(medianImage).rename('rendviImage');
var ndi45Image = ndi45(medianImage).rename('ndi45Image');
var wdrvi_1Image = wdrvi_1(medianImage).rename('wdrvi_1Image');
var wdrvi_2Image = wdrvi_2(medianImage).rename('wdrvi_2Image');
var ndwiImage = ndwi(medianImage).rename('ndwiImage');

// Visualize the indices on a map
Map.addLayer(ndviImage, {min: -1, max: 1, palette: ['red', 'yellow', 'green']}, 'NDVI');
Map.addLayer(ndvi_nImage, {min: -1, max: 1, palette: ['red', 'yellow', 'green']}, 'NDVI_n');
Map.addLayer(ndvi_reImage, {min: -1, max: 1, palette: ['red', 'yellow', 'green']}, 'NDVI_re');
Map.addLayer(ndre_nImage, {min: -1, max: 1, palette: ['red', 'yellow', 'green']}, 'NDRE_n');
Map.addLayer(rendviImage, {min: -1, max: 1, palette: ['red', 'yellow', 'green']}, 'RENDVI');
Map.addLayer(ndi45Image, {min: -1, max: 1, palette: ['red', 'yellow', 'green']}, 'NDI45');
Map.addLayer(wdrvi_1Image, {min: -1, max: 1, palette: ['red', 'yellow', 'green']}, 'WDRVI_1');
Map.addLayer(wdrvi_2Image, {min: -1, max: 1, palette: ['red', 'yellow', 'green']}, 'WDRVI_2');
Map.addLayer(ndwiImage, {min: -1, max: 1, palette: ['blue', 'white', 'brown']}, 'NDWI');

/////////////////////   Remaining Code ////////////////////

////Training and Validadtion samples = AGB (ton/ha)                                  
var samples = ee.FeatureCollection("users/asimqadeer926/train_points_diamir");
print('Training Samples', samples)
var validationsamples = ee.FeatureCollection("users/asimqadeer926/validation_points_diamir");
print('Validation Samples', validationsamples)


////Forest Mask for mature tress = masking AGB
var lc = ee.Image('users/hammadgilani/GB_reclassified_geo'); 
var datamask = lc;      
var mask = datamask.eq(3).clip(diamir); 
Map.addLayer(mask, {}, 'Diamir - GB Forest');


////Sentinel-2 Cloud Masking and Median image 
var s2collection = ee.ImageCollection("COPERNICUS/S2").filterDate('2016-06-01', '2016-12-31');
var cloudyImage = ee.Image('COPERNICUS/S2/20170727T053639_20170727T053854_T43SFV');
print(cloudyImage,'cloudyimage')


// Bits 10 and 11 are clouds and cirrus, respectively.
var cloudBitMask = ee.Number(2).pow(10).int();
var cirrusBitMask = ee.Number(2).pow(11).int();
var qa = cloudyImage.select('QA60');

function maskS2clouds(image) {
  var qa = image.select('QA60');
  // Both flags should be set to zero, indicating clear conditions.
  var mask = qa.bitwiseAnd(cloudBitMask).eq(0).and(
            qa.bitwiseAnd(cirrusBitMask).eq(0));
  return image.updateMask(mask);
}

var cloudMasked = s2collection.filterBounds(diamir).map(maskS2clouds);
var b11 = cloudMasked.median().select('B11')
var b12 = cloudMasked.median().select('B12')

var median = cloudMasked.median().select('B[1-9]')
/*.addBands(dem_clip).addBands(slope).addBands(aspect)
.addBands(b11).addBands(b12) 
.addBands(ndviImage).addBands(ndvi_nImage).addBands(ndvi_reImage).addBands(ndre_nImage)
.addBands(rendviImage).addBands(ndi45Image).addBands(wdrvi_1Image).addBands(wdrvi_2Image)
.addBands(ndwiImage)*/
print(median,'median img')
Map.addLayer(median.clip(diamir), {bands: ['B8', 'B4', 'B2'], max: 2000}, 'Median Image');

////Extraction of Sentinel-2 bands for RandomForest modelling 
var trainingSample = median.sampleRegions(samples, ["AGB"], 10).randomColumn();
var bandNames = median.bandNames();

print(bandNames)
var classifier = ee.Classifier.smileRandomForest(200).train(trainingSample,"AGB", bandNames).setOutputMode('REGRESSION');

//var classifier1 = ee.Classifier.smileGradientTreeBoost(2000).setOutputMode("REGRESSION")

// var classifier1 = ee.Classifier.libsvm({
//     svmType: "EPSILON_SVR",
//     kernelType: "LINEAR",
//     shrinking: true,
//     //degree: 1,
//     cost: 1,
//     terminationEpsilon: 0.001,
//     lossEpsilon: 0.1})
//   .setOutputMode("REGRESSION")
  
// var classifier = classifier1.train(trainingSample,"AGB", bandNames);

////Variable Importance - Graph  
var dict = classifier.explain();
print('Explain:',dict);

var variable_importance = ee.Feature(null, ee.Dictionary(dict).get('importance'));
print(ui.Chart.feature.byProperty(variable_importance)
    .setChartType('ColumnChart')
    .setOptions({
      title: 'Random Forest Variable Importance',
      legend: {position: 'none'},
      hAxis: {title: 'Bands'},
      vAxis: {title: 'Importance'}
    }));
  

////Implementation of model on image
var classification = ee.Image(median.classify(classifier).clip(diamir));
//print(classification);

//Masking and displaying - AGB map on Forest cover 
var clipped = classification.updateMask(mask);
var viz = {min:0, max: 800, palette:'FFFFFF, CE7E45, DF923D, F1B555, FCD163, 99B718, 74A901, 66A000, 529400,' +
    '3E8601, 207401, 056201, 004C00, 023B01, 012E01, 011D01, 011301'};

Map.addLayer (classification, viz, 'AGB Map1');
Map.addLayer (clipped, viz, 'AGB Map');


//Validation of AGB map - 

var features = clipped.sampleRegions(validationsamples, ["AGB"], 10)
print(ui.Chart.feature.byFeature({
  features: features, 
  xProperty: 'AGB', 
  yProperties: ['classification']
}).setOptions({
  title: "Validation of AGB",
  hAxis: {title: 'Observed AGB'},
  vAxis: {title: 'Predicted AGB'},
  colors: ['#EF851C'],
  pointSize: 3,
  lineWidth: 0,
  trendlines: { 
    0: { 
      type: 'linear', 
      visibleInLegend: false,
      showR2: true,
      color: 'black',
      opacity: 1,
      lineWidth: 2,
      pointSize: 0
    } 
  }  
}));

// Calclate r2
 var r2 = ee.Number(features.reduceColumns(ee.Reducer.pearsonsCorrelation(), ['AGB', 'classification']).get('correlation')).pow(2);
 print('r^2 = ', r2);

// Calclate RMSE
var val_extracted = features.reduceColumns(ee.Reducer.toList(2),['AGB','classification']).get('list')
var obs = ee.Array(val_extracted).transpose().cut([0,-1]).project([1])
var pred = ee.Array(val_extracted).transpose().cut([1,-1]).project([1])
var rmse = obs.subtract(pred).pow(2).reduce('mean', [0]).sqrt()
print('RMSE = ', rmse)

////Display training and validation samples 
Map.addLayer(samples, {color: 'green'}, "AGB  Training Samples"); 
Map.addLayer(validationsamples, {color: 'yellow'}, "AGB  Validation Samples");

he problem is only bands from 1-9 give highest accuracy of 40 where as they have lower variable importance when I filter and select high importance variables , the accuracy reduces which is the problem . the concerned code is below:

var median = cloudMasked.median().select('B[1-9]')
.addBands(dem_clip).addBands(slope).addBands(aspect)
.addBands(b11).addBands(b12) 
.addBands(ndviImage).addBands(ndvi_nImage).addBands(ndvi_reImage).addBands(ndre_nImage)
.addBands(rendviImage).addBands(ndi45Image).addBands(wdrvi_1Image).addBands(wdrvi_2Image)
.addBands(ndwiImage)

The variable importance band is also attached below. RF Variable Importance

1 Answers1

0

I think you might be getting ahead of yourself trying to use the importance at this stage of your analysis.

An accuracy of 40% represents a model with basically no skill (you could flip a coin and use that to classify points and you'd get a better accuracy...). So trying to figure out which parameter is most important for your model that performs badly doesn't really make sense.

I would suggest re-evaluating your approach, and first focus on training a model that is good. After that, you can investigate the parameter importance to understand which contribute most to your good model. Though, even then, I would caution that the parameter importance isn't a perfect metric, and isn't something to be just used without critical analysis.

Téo
  • 191
  • 3