Select to view content in your preferred language

Calculate Correlation and P-value between rasters

3307
11
Jump to solution
07-02-2019 10:15 PM
margaritastevia
Emerging Contributor

Hi Xander Bakker‌ I have used this script and got two rasters  "correlation" and "pearson". Which one is the P-value? 

Branched from https://community.esri.com/thread/200534-re-correlation-between-two-different-rasters 

0 Kudos
1 Solution

Accepted Solutions
XanderBakker
Esri Esteemed Contributor

Hi margarita stevia , 

Could you validate if this works for you? I will create the additional p-value raster and a mask raster containing the pixels with value 1 where the condition (p-value <= 0.05 and positive correlation) is met. The calculation of the p-value is based on the code provided by Dan Patterson 

It seems that the resulting areas (blue pixels) are near the rivers . Does that make sense?

See code below:

def main():
    print "load modules..."
    import arcpy
    arcpy.env.overwriteOutput = True

    import numpy as np
    template1 = r'C:\GeoNet\Pvalue\Send\NDVI\r{0}_WUE.TIF'
    template2 = r'C:\GeoNet\Pvalue\Send\temp\r{0}_TEM.TIF'
    nodata = -3.4028235e+38
    out_ras_corr = r'C:\GeoNet\Pvalue\Send\Results\correlation_v01.TIF'
    out_ras_pear = r'C:\GeoNet\Pvalue\Send\Results\pearson_v01.TIF'
    out_ras_pval = r'C:\GeoNet\Pvalue\Send\Results\pvalue_v01.TIF'
    out_ras_mask = r'C:\GeoNet\Pvalue\Send\Results\mask_v01.TIF'

    # settings
    max_pval = 0.05
    min_corr = 0
    mask_val = 1
    no_mask_val = 0

    print "create nested numpy array list..."
    lst_np_ras = []
    for i in range(1, 5):
        ras_path1 = template1.format("%03d" % (i,))
        print " - ", ras_path1
        ras_np1 = arcpy.RasterToNumPyArray(ras_path1)
        ras_path2 = template2.format("%03d" % (i,))
        print " - ", ras_path2
        ras_np2 = arcpy.RasterToNumPyArray(ras_path2)
        lst_np_ras.append([ras_np1, ras_np2])

    print "read props numpy raster..."
    ras_np = lst_np_ras[0][0]  # take first numpy array from list
    rows = ras_np.shape[0]
    cols = ras_np.shape[1]
    print " - rows:", rows
    print " - cols:", cols

    print "create output numpy array..."
    ras_path = template1.format("%03d" % (1,))
    raster = arcpy.Raster(ras_path)
    ras_np_res_corr = np.ndarray((rows, cols))
    ras_np_res_pear = np.ndarray((rows, cols))
    ras_np_res_pval = np.ndarray((rows, cols))
    ras_np_res_mask = np.ndarray((rows, cols))
    print " - out rows:", ras_np_res_corr.shape[0]
    print " - out cols:", ras_np_res_corr.shape[1]

    print "loop through pixels..."
    pix_cnt = 0
    for row in range(rows):
        for col in range(cols):
            pix_cnt += 1
            if pix_cnt % 5000 == 0:
                print " - row:", row, "  col:", col, "  pixel:", pix_cnt
            lst_vals1 = []
            lst_vals2 = []
            try:
                for lst_pars in lst_np_ras:
                    lst_vals1.append(lst_pars[0][row, col])
                    lst_vals2.append(lst_pars[1][row, col])
                lst_vals1 = ReplaceNoData(lst_vals1, nodata)
                lst_vals2 = ReplaceNoData(lst_vals2, nodata)
                # perform calculation on list
                correlation = CalculateCorrelation(lst_vals1, lst_vals2, nodata)
                ras_np_res_corr[row, col] = correlation
                pearson = CalculatePearsons(lst_vals1, lst_vals2, nodata)
                ras_np_res_pear[row, col] = pearson
                pvalue = CalculatePvalue(lst_vals1, lst_vals2, nodata)
                ras_np_res_pval[row, col] = pvalue
                # compare correlation and p-value
                if pvalue == nodata:
                    mask = nodata
                elif (correlation >= min_corr) and (pvalue <= max_pval):
                    mask = mask_val
                else:
                    mask = no_mask_val
                ras_np_res_mask[row, col] = mask

            except Exception as e:
                print "ERR:", e
                print " - row:", row, "  col:", col, "  pixel:", pix_cnt
                print " - lst_vals1:", lst_vals1
                print " - lst_vals2:", lst_vals2

    pnt = arcpy.Point(raster.extent.XMin, raster.extent.YMin) #  - raster.meanCellHeight
    xcellsize = raster.meanCellWidth
    ycellsize = raster.meanCellHeight

    print "Write output rasters..."
    print " - ", out_ras_corr
    ras_res_corr = arcpy.NumPyArrayToRaster(ras_np_res_corr, lower_left_corner=pnt, x_cell_size=xcellsize,
                                 y_cell_size=ycellsize, value_to_nodata=nodata)
    arcpy.CopyRaster_management(ras_res_corr, out_ras_corr)
    arcpy.DefineProjection_management(in_dataset=out_ras_corr, coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]")

    print " - ", out_ras_pear
    ras_res_pear = arcpy.NumPyArrayToRaster(ras_np_res_pear, lower_left_corner=pnt, x_cell_size=xcellsize,
                                 y_cell_size=ycellsize, value_to_nodata=nodata)
    arcpy.CopyRaster_management(ras_res_pear, out_ras_pear)
    arcpy.DefineProjection_management(in_dataset=out_ras_pear, coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]")

    print " - ", out_ras_pval
    ras_res_pval = arcpy.NumPyArrayToRaster(ras_np_res_pval, lower_left_corner=pnt, x_cell_size=xcellsize,
                                 y_cell_size=ycellsize, value_to_nodata=nodata)
    arcpy.CopyRaster_management(ras_res_pval, out_ras_pval)
    arcpy.DefineProjection_management(in_dataset=out_ras_pval, coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]")

    print " - ", out_ras_mask
    ras_res_mask = arcpy.NumPyArrayToRaster(ras_np_res_mask, lower_left_corner=pnt, x_cell_size=xcellsize,
                                 y_cell_size=ycellsize, value_to_nodata=nodata)
    arcpy.CopyRaster_management(ras_res_mask, out_ras_mask)
    arcpy.DefineProjection_management(in_dataset=out_ras_mask, coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]")


def CalculateCorrelation(a, b, nodata):
    import numpy
    try:
        coef = numpy.corrcoef(a,b)
        return coef[0][1]
    except:
        return nodata


def ReplaceNoData(lst, nodata):
    res = []
    for a in lst:
        if a < nodata / 2.0:
            res.append(None)
        else:
            res.append(a)
    return res


def CalculatePearsons(a, b, nodata):
    import numpy as np
    from scipy.stats import pearsonr
    try:
        x = np.array(a)
        y = np.array(b)
        r, p = pearsonr(a, y)
        return p
    except:
        return nodata

def CalculatePvalue(a, b, nodata):
    import numpy as np
    from scipy.stats import stats
    try:
        x = np.array(a)
        y = np.array(b)
        r, p_val = stats.pearsonr(x, y)
        return p_val
    except:
        return nodata



if __name__ == '__main__':
    main()

View solution in original post

11 Replies
DanPatterson_Retired
MVP Emeritus

Neither, p-value is a measure of the strength of the correlation coefficient calculated from sample size and the measured 'r' and a sampling distribution

scipy.stats.pearsonr — SciPy v1.3.0 Reference Guide 

example for variants of simple arrays, using this

from scipy import stats

a = np.arange(9)
b = np.arange(9, 0, -1)
c = np.array([9, 7, 8, 6, 5, 4, 3, 2, 1]) # ---- like b, but 2 numbers switched

r, p_val = stats.pearsonr(a, b)  # ---- perfect inverse, r = -1, p = 0.0000
r, p_val
(-1.0, 0.0)

r, p_val = stats.pearsonr(a, c)
r, p_val
(-0.9833333333333333, 1.936196303745927e-06)
margaritastevia
Emerging Contributor

I want to extract the pixels where correlation is positive and p-value is < 0.05 How i can do that? How i can estimate P as a raster?

0 Kudos
margaritastevia
Emerging Contributor

Hi Xander Bakker‌ please help me with how i can extract correlation values at 0.05 significance level. I'm running out of time.   

0 Kudos
XanderBakker
Esri Esteemed Contributor

Hi margaritastevia 

Can you share a sample of your data and the script you have so far?

0 Kudos
margaritastevia
Emerging Contributor

Hi Xander Bakker,

Thank you very much for your kind response. please find the attachment.

0 Kudos
XanderBakker
Esri Esteemed Contributor

Hi margarita stevia , 

Could you validate if this works for you? I will create the additional p-value raster and a mask raster containing the pixels with value 1 where the condition (p-value <= 0.05 and positive correlation) is met. The calculation of the p-value is based on the code provided by Dan Patterson 

It seems that the resulting areas (blue pixels) are near the rivers . Does that make sense?

See code below:

def main():
    print "load modules..."
    import arcpy
    arcpy.env.overwriteOutput = True

    import numpy as np
    template1 = r'C:\GeoNet\Pvalue\Send\NDVI\r{0}_WUE.TIF'
    template2 = r'C:\GeoNet\Pvalue\Send\temp\r{0}_TEM.TIF'
    nodata = -3.4028235e+38
    out_ras_corr = r'C:\GeoNet\Pvalue\Send\Results\correlation_v01.TIF'
    out_ras_pear = r'C:\GeoNet\Pvalue\Send\Results\pearson_v01.TIF'
    out_ras_pval = r'C:\GeoNet\Pvalue\Send\Results\pvalue_v01.TIF'
    out_ras_mask = r'C:\GeoNet\Pvalue\Send\Results\mask_v01.TIF'

    # settings
    max_pval = 0.05
    min_corr = 0
    mask_val = 1
    no_mask_val = 0

    print "create nested numpy array list..."
    lst_np_ras = []
    for i in range(1, 5):
        ras_path1 = template1.format("%03d" % (i,))
        print " - ", ras_path1
        ras_np1 = arcpy.RasterToNumPyArray(ras_path1)
        ras_path2 = template2.format("%03d" % (i,))
        print " - ", ras_path2
        ras_np2 = arcpy.RasterToNumPyArray(ras_path2)
        lst_np_ras.append([ras_np1, ras_np2])

    print "read props numpy raster..."
    ras_np = lst_np_ras[0][0]  # take first numpy array from list
    rows = ras_np.shape[0]
    cols = ras_np.shape[1]
    print " - rows:", rows
    print " - cols:", cols

    print "create output numpy array..."
    ras_path = template1.format("%03d" % (1,))
    raster = arcpy.Raster(ras_path)
    ras_np_res_corr = np.ndarray((rows, cols))
    ras_np_res_pear = np.ndarray((rows, cols))
    ras_np_res_pval = np.ndarray((rows, cols))
    ras_np_res_mask = np.ndarray((rows, cols))
    print " - out rows:", ras_np_res_corr.shape[0]
    print " - out cols:", ras_np_res_corr.shape[1]

    print "loop through pixels..."
    pix_cnt = 0
    for row in range(rows):
        for col in range(cols):
            pix_cnt += 1
            if pix_cnt % 5000 == 0:
                print " - row:", row, "  col:", col, "  pixel:", pix_cnt
            lst_vals1 = []
            lst_vals2 = []
            try:
                for lst_pars in lst_np_ras:
                    lst_vals1.append(lst_pars[0][row, col])
                    lst_vals2.append(lst_pars[1][row, col])
                lst_vals1 = ReplaceNoData(lst_vals1, nodata)
                lst_vals2 = ReplaceNoData(lst_vals2, nodata)
                # perform calculation on list
                correlation = CalculateCorrelation(lst_vals1, lst_vals2, nodata)
                ras_np_res_corr[row, col] = correlation
                pearson = CalculatePearsons(lst_vals1, lst_vals2, nodata)
                ras_np_res_pear[row, col] = pearson
                pvalue = CalculatePvalue(lst_vals1, lst_vals2, nodata)
                ras_np_res_pval[row, col] = pvalue
                # compare correlation and p-value
                if pvalue == nodata:
                    mask = nodata
                elif (correlation >= min_corr) and (pvalue <= max_pval):
                    mask = mask_val
                else:
                    mask = no_mask_val
                ras_np_res_mask[row, col] = mask

            except Exception as e:
                print "ERR:", e
                print " - row:", row, "  col:", col, "  pixel:", pix_cnt
                print " - lst_vals1:", lst_vals1
                print " - lst_vals2:", lst_vals2

    pnt = arcpy.Point(raster.extent.XMin, raster.extent.YMin) #  - raster.meanCellHeight
    xcellsize = raster.meanCellWidth
    ycellsize = raster.meanCellHeight

    print "Write output rasters..."
    print " - ", out_ras_corr
    ras_res_corr = arcpy.NumPyArrayToRaster(ras_np_res_corr, lower_left_corner=pnt, x_cell_size=xcellsize,
                                 y_cell_size=ycellsize, value_to_nodata=nodata)
    arcpy.CopyRaster_management(ras_res_corr, out_ras_corr)
    arcpy.DefineProjection_management(in_dataset=out_ras_corr, coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]")

    print " - ", out_ras_pear
    ras_res_pear = arcpy.NumPyArrayToRaster(ras_np_res_pear, lower_left_corner=pnt, x_cell_size=xcellsize,
                                 y_cell_size=ycellsize, value_to_nodata=nodata)
    arcpy.CopyRaster_management(ras_res_pear, out_ras_pear)
    arcpy.DefineProjection_management(in_dataset=out_ras_pear, coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]")

    print " - ", out_ras_pval
    ras_res_pval = arcpy.NumPyArrayToRaster(ras_np_res_pval, lower_left_corner=pnt, x_cell_size=xcellsize,
                                 y_cell_size=ycellsize, value_to_nodata=nodata)
    arcpy.CopyRaster_management(ras_res_pval, out_ras_pval)
    arcpy.DefineProjection_management(in_dataset=out_ras_pval, coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]")

    print " - ", out_ras_mask
    ras_res_mask = arcpy.NumPyArrayToRaster(ras_np_res_mask, lower_left_corner=pnt, x_cell_size=xcellsize,
                                 y_cell_size=ycellsize, value_to_nodata=nodata)
    arcpy.CopyRaster_management(ras_res_mask, out_ras_mask)
    arcpy.DefineProjection_management(in_dataset=out_ras_mask, coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]")


def CalculateCorrelation(a, b, nodata):
    import numpy
    try:
        coef = numpy.corrcoef(a,b)
        return coef[0][1]
    except:
        return nodata


def ReplaceNoData(lst, nodata):
    res = []
    for a in lst:
        if a < nodata / 2.0:
            res.append(None)
        else:
            res.append(a)
    return res


def CalculatePearsons(a, b, nodata):
    import numpy as np
    from scipy.stats import pearsonr
    try:
        x = np.array(a)
        y = np.array(b)
        r, p = pearsonr(a, y)
        return p
    except:
        return nodata

def CalculatePvalue(a, b, nodata):
    import numpy as np
    from scipy.stats import stats
    try:
        x = np.array(a)
        y = np.array(b)
        r, p_val = stats.pearsonr(x, y)
        return p_val
    except:
        return nodata



if __name__ == '__main__':
    main()
margaritastevia
Emerging Contributor

print("Thank you")

Thank you Sir Xander Bakker I'm vary thankful to you for your kind response. The script is running perfectly. Yes the pixels are near the river.  

XanderBakker
Esri Esteemed Contributor

Hi margaritastevia ,

Glad to hear that the script is producing the expected results. Good luck with you investigation!

0 Kudos
YingyingZhang
New Contributor

Hi Xander Bakker, 

    Glad to get the script of the correlation and P-value between rasters, but now I only have two rasters(ndvi and occurence of flooding). I can't  solve my question using the script above. I need calculate the correlation and p-value between two rasters not sets of rasters.Xander BakkerCalculate Correlation and P-value between rasters#My data is attached, need your help.

Best wishes

0 Kudos