Land cover classification using a Naives Bayes algorithm with Python - Tutorial

Machine learning can be applied to many fields as land cover classification from remote sensing imagery. The performance and accuracy of classification will depend on the number of raster bands, the image resolution, the land cover type and the algorithm used. This tutorial will perform an applied case of land cover classification from a panchromatic image in Python using the Naives Bayes algorithm implemented on the Scikit Learn package. The classification will cover four categories as: rivers, river beaches, woods and pastures; coding is performed under a Jupyter Notebook with Python running from a geospatial Conda environment. Some graphics and statistics about the classification precision are also included on the tutorial.

IMPORTANT: You will need a conda environment with geospatial tools for the tutorial. Create the environment following this link: hatarilabs.com/ih-en/how-to-install-python-geopandas-in-windows-on-a-conda-environment-tutorial

Tutorial

Code

Part 1 Data processing

import rasterio
from rasterio.plot import show
classRst =  rasterio.open('../Data/Class/LandCoverClassified_v2.tif')
show(classRst)
classBand = classRst.read(1)
classBand[:5,:5]
array([[40., 40., 40., 40., 40.],
       [40., 40., 40., 40., 40.],
       [40., 40., 40., 40., 40.],
       [40., 40., 40., 40., 40.],
       [40., 40., 40., 40., 40.]], dtype=float32)
nrow, ncol = classBand.shape
print(nrow,ncol)
42 56
# get a sample pixel coordinates 
for row in range(nrow)[:3]:
    for col in range(ncol)[:3]:
        print(classRst.xy(row,col))
(-73.163875, -3.916002123)
(-73.16362500000001, -3.916002123)
(-73.163375, -3.916002123)
(-73.163875, -3.916252123)
(-73.16362500000001, -3.916252123)
(-73.163375, -3.916252123)
(-73.163875, -3.9165021230000003)
(-73.16362500000001, -3.9165021230000003)
(-73.163375, -3.9165021230000003)
# get pixel value for a give coordinate
imageRst =  rasterio.open('../Data/Rst/riverImage2.tif')
imageBand = imageRst.read(1)
row, col = imageRst.index(-73.16275, -3.91712)
bandValue = imageBand[row,col]
print(bandValue)
15
# open list of image bands
redBand = imageRst.read(1)
greenBand = imageRst.read(2)
blueBand = imageRst.read(3)
bandList = [redBand, greenBand, blueBand]
#define a function that returns a band value
def rasterValue(imageRst,imageBand,x,y):
    row, col = imageRst.index(x,y)
    bandValue = imageBand[row,col]
    return(bandValue)
rasterValue(imageRst,blueBand,-73.16275, -3.91712)
45
#create complex function for all rows, cols and bands
import sys
classList = []

# get a sample pixel coordinates 
for row in range(nrow):
    for col in range(ncol):
        sys.stdout.write('\r'+'Working with row, col: %d %d'%(row,col))
        partialLine = []
        x,y = classRst.xy(row,col)
        for band in bandList:
            value = rasterValue(imageRst,band,x,y)
            partialLine.append(value)
        partialLine.append(classBand[row,col])
        classList.append(partialLine)
Working with row, col: 41 55
import numpy as np
classArray = np.array(classList)
classArray[:5]
array([[15., 56., 40., 40.],
       [10., 51., 35., 40.],
       [13., 63., 42., 40.],
       [13., 65., 42., 40.],
       [18., 70., 47., 40.]], dtype=float32)
np.save('../Data/Txt/classArray',classArray)

Part 2 Naives Bayes

import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
classArray = np.load('../Data/Txt/classArray.npy')
classArray[:5]
array([[15., 56., 40., 40.],
       [10., 51., 35., 40.],
       [13., 63., 42., 40.],
       [13., 65., 42., 40.],
       [18., 70., 47., 40.]], dtype=float32)
X = classArray[:,:-1]
X[:2]
array([[15., 56., 40.],
       [10., 51., 35.]], dtype=float32)
y = classArray[:,-1]
y[:2]
array([40., 40.], dtype=float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf = GaussianNB()
clf.fit(X_train, y_train)
GaussianNB()
y_predict = clf.predict(X_test)
import matplotlib.pyplot as plt
fig= plt.figure(figsize=(12,6))
plt.scatter(y_test,y_predict,alpha=0.01,s=100)
<matplotlib.collections.PathCollection at 0x2b37a32db20>
from sklearn.metrics import confusion_matrix
confMatrix = confusion_matrix(y_test,y_predict)
confMatrix
array([[238,   0,  12,   1],
       [ 10, 111,   3,   0],
       [  2,   6,  45,   8],
       [  0,   0,  39, 302]], dtype=int64)
#!pip install seaborn
import seaborn as sns; sns.set_theme()
fig= plt.figure(figsize=(12,6))
ax = sns.heatmap(confMatrix)
# generation of output raster
import rasterio 

res = 0.0005
x0,y0 = -73.2870, -3.9635
xLL, yLL = x0 - res/2, y0 - res/2
nrow, ncol = 60, 100
#definition of the raster transform array
from rasterio.transform import Affine
transform = Affine.translation(xLL, yLL)*Affine.scale(res,res)
transform
Affine(0.0005, 0.0, -73.28725,
       0.0, 0.0005, -3.9637499999999997)
#get crs as wkt
from rasterio.crs import CRS
rasterCrs = CRS.from_epsg(4326)
rasterCrs.data
{'init': 'epsg:4326'}
imageRst =  rasterio.open('../Data/Rst/riverImage2.tif')

# open list of image bands
redBand = imageRst.read(1)
greenBand = imageRst.read(2)
blueBand = imageRst.read(3)
bandList = [redBand, greenBand, blueBand]
import rasterio

# empty array
classArray = np.zeros([nrow,ncol])

### From Part 1



#define a function that returns a band value
def rasterValue(imageRst,imageBand,x,y):
    row, col = imageRst.index(x,y)
    bandValue = imageBand[row,col]
    return(bandValue)

#create complex function for all rows, cols and bands
import sys
classList = []

# get a sample pixel coordinates 
for row in range(nrow):
    for col in range(ncol):
        sys.stdout.write('\r'+'Working with row, col: %d %d'%(row,col))
        partialLine = []
        #x,y = classRst.xy(row,col)
        x = x0 + col*res #<--new
        y = y0 + row*res #<--new
        for band in bandList:
            value = rasterValue(imageRst,band,x,y)
            partialLine.append(value)
        y_predict = clf.predict([np.array(partialLine)]) #<--new
        classArray[row,col] = y_predict #<--new
        #partialLine.append(classBand[row,col])
        #classList.append(partialLine)
Working with row, col: 59 99
#show class array
classArray
array([[40., 40., 40., ..., 40., 40., 40.],
       [40., 40., 40., ..., 40., 40., 40.],
       [40., 40., 40., ..., 40., 40., 40.],
       ...,
       [10., 30., 40., ..., 40., 40., 40.],
       [30., 10., 30., ..., 40., 40., 40.],
       [40., 40., 30., ..., 40., 40., 40.]])
#plot array
plt.imshow(classArray)
<matplotlib.image.AxesImage at 0x2b37df03400>
#definition, register and close of interpolated raster
interpRaster = rasterio.open('../Data/Class/predictedLandCover.tif',
                                'w',
                                driver='GTiff',
                                height=nrow,
                                width=ncol,
                                count=1,
                                dtype=np.float32,
                                crs=rasterCrs,
                                transform=transform,
                                )
interpRaster.write(classArray,1)
interpRaster.close()

Input data

You can download the input data from this link.

Comment

Saul Montoya

Saul Montoya es Ingeniero Civil graduado de la Pontificia Universidad Católica del Perú en Lima con estudios de postgrado en Manejo e Ingeniería de Recursos Hídricos (Programa WAREM) de la Universidad de Stuttgart con mención en Ingeniería de Aguas Subterráneas y Hidroinformática.

 

Suscribe to our online newsletter

Subscribe for free newsletter, receive news, interesting facts and dates of our courses in water resources.