{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import rioxarray\n", "import os\n", "import numpy as np\n", "\n", "# <<< INVASIVE SPECIES MAP >>>\n", "INVASIVE_BIRDS_PATH = 'Datasets/Machine Learning/10km Rasters/Birds/All_Invasive_Birds_10km.tif'\n", "\n", "# <<< LAND COVER MAP >>>\n", "# Dimensions: 700000x1300000\n", "LAND_COVER_MAP_PATH = 'Datasets/Machine Learning/10km Rasters/Features/gb2021lcm10km_percentage_target.tif'\n", "\n", "# <<< FERTILISER >>>\n", "# The dataset consists of maps of the predicted average annual application rates (2010-2015) of three different inorganic \n", "# chemical fertilisers – nitrogen (N), phosphorus (P) and potassium (K) – in England across a six-year period, along with \n", "# their respective estimates of uncertainty, at a 1 km x 1 km resolution. \n", "FERTILISER_K_PATH = 'Datasets/Machine Learning/10km Rasters/Features/fertiliser_k_prediction_uncertainty_10km.tif'\n", "FERTILISER_N_PATH = 'Datasets/Machine Learning/10km Rasters/Features/fertiliser_n_prediction_uncertainty_10km.tif'\n", "FERTILISER_P_PATH = 'Datasets/Machine Learning/10km Rasters/Features/fertiliser_p_prediction_uncertainty_10km.tif'\n", "\n", "# <<< PESTICIDE >>>\n", "PESTICIDE_FOLDER_PATH = 'Datasets/Machine Learning/10km Rasters/Features/Pesticides/'\n", "\n", "# <<< INTEGRATED HYDROLOGICAL DIGITAL TERRAIN MODEL >>>\n", "# Dimensions: 700000x1300000\n", "# These datasets all only have one band\n", "ELEVATION_PATH = 'Datasets/Machine Learning/10km Rasters/Features/HGHT_10km.tif'\n", "CUMULATIVE_CATCHMENT_AREA_PATH = 'Datasets/Machine Learning/10km Rasters/Features/CCAR_10km.tif'\n", "SURFACE_TYPE_PATH = 'Datasets/Machine Learning/10km Rasters/Features/SURF_10km.tif'\n", "OUTFLOWING_DRAINAGE_DIRECTION_PATH = 'Datasets/Machine Learning/10km Rasters/Features/OUTF_10km.tif'\n", "INFLOWING_DRAINAGE_PATTERN_PATH = 'Datasets/Machine Learning/10km Rasters/Features/INFL_10km.tif'\n", "\n", "IHDTM = {\n", " 'Elevation': ELEVATION_PATH, \n", " 'Cumulative catchment area': CUMULATIVE_CATCHMENT_AREA_PATH, \n", " 'Surface type': SURFACE_TYPE_PATH, \n", " 'Outflowing drainage direction': OUTFLOWING_DRAINAGE_DIRECTION_PATH, \n", " 'Inflowing drainage direction': INFLOWING_DRAINAGE_PATTERN_PATH\n", "}\n", "\n", "\n", "# <<< Opening in rasterio >>>\n", "# dataset = rasterio.open(FERTILISER_P_PATH)\n", "# data = dataset.read()\n", "# np.max(data)\n", "\n", "# <<< Opening in rioxarray >>>\n", "# dataset = rioxarray.open_rasterio(ELEVATION_PATH)\n", "# dataset.name = 'data'\n", "# df = dataset.to_dataframe()\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data\n", "0 182086\n", "1 1212\n", "2 694\n", "3 495\n", "4 386\n", " ... \n", "80 25\n", "93 24\n", "85 23\n", "79 23\n", "78 23\n", "Length: 101, dtype: int64\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bandDeciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Inland rockSaltwaterFreshwaterSupralittoral rockSupralittoral sedimentLittoral rockLittoral sedimentSaltmarshUrbanSuburban
yx
1295000.05000.00000000000...0000000000
15000.00000000000...0000000000
25000.00000000000...0000000000
35000.00000000000...0000000000
45000.00000000000...0000000000
.....................................................................
5000.0655000.00000000000...0000000000
665000.00000000000...0000000000
675000.00000000000...0000000000
685000.00000000000...0000000000
695000.00000000000...0000000000
\n", "

9100 rows × 21 columns

\n", "
" ], "text/plain": [ "band Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1295000.0 5000.0 0 0 0 \n", " 15000.0 0 0 0 \n", " 25000.0 0 0 0 \n", " 35000.0 0 0 0 \n", " 45000.0 0 0 0 \n", "... ... ... ... \n", "5000.0 655000.0 0 0 0 \n", " 665000.0 0 0 0 \n", " 675000.0 0 0 0 \n", " 685000.0 0 0 0 \n", " 695000.0 0 0 0 \n", "\n", "band Improve grassland Neutral grassland \\\n", "y x \n", "1295000.0 5000.0 0 0 \n", " 15000.0 0 0 \n", " 25000.0 0 0 \n", " 35000.0 0 0 \n", " 45000.0 0 0 \n", "... ... ... \n", "5000.0 655000.0 0 0 \n", " 665000.0 0 0 \n", " 675000.0 0 0 \n", " 685000.0 0 0 \n", " 695000.0 0 0 \n", "\n", "band Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1295000.0 5000.0 0 0 0 0 \n", " 15000.0 0 0 0 0 \n", " 25000.0 0 0 0 0 \n", " 35000.0 0 0 0 0 \n", " 45000.0 0 0 0 0 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 0 0 0 \n", " 665000.0 0 0 0 0 \n", " 675000.0 0 0 0 0 \n", " 685000.0 0 0 0 0 \n", " 695000.0 0 0 0 0 \n", "\n", "band Heather grassland ... Inland rock Saltwater \\\n", "y x ... \n", "1295000.0 5000.0 0 ... 0 0 \n", " 15000.0 0 ... 0 0 \n", " 25000.0 0 ... 0 0 \n", " 35000.0 0 ... 0 0 \n", " 45000.0 0 ... 0 0 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 ... 0 0 \n", " 665000.0 0 ... 0 0 \n", " 675000.0 0 ... 0 0 \n", " 685000.0 0 ... 0 0 \n", " 695000.0 0 ... 0 0 \n", "\n", "band Freshwater Supralittoral rock Supralittoral sediment \\\n", "y x \n", "1295000.0 5000.0 0 0 0 \n", " 15000.0 0 0 0 \n", " 25000.0 0 0 0 \n", " 35000.0 0 0 0 \n", " 45000.0 0 0 0 \n", "... ... ... ... \n", "5000.0 655000.0 0 0 0 \n", " 665000.0 0 0 0 \n", " 675000.0 0 0 0 \n", " 685000.0 0 0 0 \n", " 695000.0 0 0 0 \n", "\n", "band Littoral rock Littoral sediment Saltmarsh Urban \\\n", "y x \n", "1295000.0 5000.0 0 0 0 0 \n", " 15000.0 0 0 0 0 \n", " 25000.0 0 0 0 0 \n", " 35000.0 0 0 0 0 \n", " 45000.0 0 0 0 0 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 0 0 0 \n", " 665000.0 0 0 0 0 \n", " 675000.0 0 0 0 0 \n", " 685000.0 0 0 0 0 \n", " 695000.0 0 0 0 0 \n", "\n", "band Suburban \n", "y x \n", "1295000.0 5000.0 0 \n", " 15000.0 0 \n", " 25000.0 0 \n", " 35000.0 0 \n", " 45000.0 0 \n", "... ... \n", "5000.0 655000.0 0 \n", " 665000.0 0 \n", " 675000.0 0 \n", " 685000.0 0 \n", " 695000.0 0 \n", "\n", "[9100 rows x 21 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Open the land cover map raster file, transform its bands into feature columns and use this as the base for the main dataframe\n", "# as this has the widest extent out of all the files and is arguably one of the most important dataframes\n", "\n", "lcm = rioxarray.open_rasterio(LAND_COVER_MAP_PATH)\n", "lcm.name = 'data'\n", "main_df = lcm.to_dataframe().drop(columns='spatial_ref')\n", "print(main_df.value_counts())\n", "main_df = main_df.unstack(level='band')\n", "\n", "LCM_CLASSES = [\n", " 'Deciduous woodland', \n", " 'Coniferous woodland', \n", " 'Arable', \n", " 'Improve grassland', \n", " 'Neutral grassland', \n", " 'Calcareous grassland', \n", " 'Acid grassland', \n", " 'Fen', \n", " 'Heather', \n", " 'Heather grassland', \n", " 'Bog',\n", " 'Inland rock', \n", " 'Saltwater',\n", " 'Freshwater',\n", " 'Supralittoral rock',\n", " 'Supralittoral sediment',\n", " 'Littoral rock',\n", " 'Littoral sediment',\n", " 'Saltmarsh',\n", " 'Urban',\n", " 'Suburban'\n", " ]\n", "\n", "main_df = main_df['data'].rename(columns={i+1: j for i, j in enumerate(LCM_CLASSES)})\n", "main_df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Littoral rockLittoral sedimentSaltmarshUrbanSuburbanElevationCumulative catchment areaSurface typeOutflowing drainage directionInflowing drainage direction
yx
1295000.05000.00000000000...00000-9999-9999-1-1255
15000.00000000000...00000-9999-9999-1-1255
25000.00000000000...00000-9999-9999-1-1255
35000.00000000000...00000-9999-9999-1-1255
45000.00000000000...00000-9999-9999-1-1255
.....................................................................
5000.0655000.00000000000...00000-9999-9999-1-1255
665000.00000000000...00000-9999-9999-1-1255
675000.00000000000...00000-9999-9999-1-1255
685000.00000000000...00000-9999-9999-1-1255
695000.00000000000...00000-9999-9999-1-1255
\n", "

9100 rows × 26 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1295000.0 5000.0 0 0 0 \n", " 15000.0 0 0 0 \n", " 25000.0 0 0 0 \n", " 35000.0 0 0 0 \n", " 45000.0 0 0 0 \n", "... ... ... ... \n", "5000.0 655000.0 0 0 0 \n", " 665000.0 0 0 0 \n", " 675000.0 0 0 0 \n", " 685000.0 0 0 0 \n", " 695000.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1295000.0 5000.0 0 0 \n", " 15000.0 0 0 \n", " 25000.0 0 0 \n", " 35000.0 0 0 \n", " 45000.0 0 0 \n", "... ... ... \n", "5000.0 655000.0 0 0 \n", " 665000.0 0 0 \n", " 675000.0 0 0 \n", " 685000.0 0 0 \n", " 695000.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1295000.0 5000.0 0 0 0 0 \n", " 15000.0 0 0 0 0 \n", " 25000.0 0 0 0 0 \n", " 35000.0 0 0 0 0 \n", " 45000.0 0 0 0 0 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 0 0 0 \n", " 665000.0 0 0 0 0 \n", " 675000.0 0 0 0 0 \n", " 685000.0 0 0 0 0 \n", " 695000.0 0 0 0 0 \n", "\n", " Heather grassland ... Littoral rock Littoral sediment \\\n", "y x ... \n", "1295000.0 5000.0 0 ... 0 0 \n", " 15000.0 0 ... 0 0 \n", " 25000.0 0 ... 0 0 \n", " 35000.0 0 ... 0 0 \n", " 45000.0 0 ... 0 0 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 ... 0 0 \n", " 665000.0 0 ... 0 0 \n", " 675000.0 0 ... 0 0 \n", " 685000.0 0 ... 0 0 \n", " 695000.0 0 ... 0 0 \n", "\n", " Saltmarsh Urban Suburban Elevation \\\n", "y x \n", "1295000.0 5000.0 0 0 0 -9999 \n", " 15000.0 0 0 0 -9999 \n", " 25000.0 0 0 0 -9999 \n", " 35000.0 0 0 0 -9999 \n", " 45000.0 0 0 0 -9999 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 0 0 -9999 \n", " 665000.0 0 0 0 -9999 \n", " 675000.0 0 0 0 -9999 \n", " 685000.0 0 0 0 -9999 \n", " 695000.0 0 0 0 -9999 \n", "\n", " Cumulative catchment area Surface type \\\n", "y x \n", "1295000.0 5000.0 -9999 -1 \n", " 15000.0 -9999 -1 \n", " 25000.0 -9999 -1 \n", " 35000.0 -9999 -1 \n", " 45000.0 -9999 -1 \n", "... ... ... \n", "5000.0 655000.0 -9999 -1 \n", " 665000.0 -9999 -1 \n", " 675000.0 -9999 -1 \n", " 685000.0 -9999 -1 \n", " 695000.0 -9999 -1 \n", "\n", " Outflowing drainage direction \\\n", "y x \n", "1295000.0 5000.0 -1 \n", " 15000.0 -1 \n", " 25000.0 -1 \n", " 35000.0 -1 \n", " 45000.0 -1 \n", "... ... \n", "5000.0 655000.0 -1 \n", " 665000.0 -1 \n", " 675000.0 -1 \n", " 685000.0 -1 \n", " 695000.0 -1 \n", "\n", " Inflowing drainage direction \n", "y x \n", "1295000.0 5000.0 255 \n", " 15000.0 255 \n", " 25000.0 255 \n", " 35000.0 255 \n", " 45000.0 255 \n", "... ... \n", "5000.0 655000.0 255 \n", " 665000.0 255 \n", " 675000.0 255 \n", " 685000.0 255 \n", " 695000.0 255 \n", "\n", "[9100 rows x 26 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For each IHDTM file, append its raster data to the main dataframe\n", "for key in IHDTM:\n", " ihdtm_data = rioxarray.open_rasterio(IHDTM[key])\n", " ihdtm_data = ihdtm_data.squeeze().drop(\"spatial_ref\").drop(\"band\")\n", " ihdtm_data.name = key\n", " ihdtm_df = ihdtm_data.to_dataframe()\n", "\n", " # Adding 25 to x and y coordinates to match index of other datasets\n", " ihdtm_df.index = ihdtm_df.index.set_levels(ihdtm_df.index.levels[0]+25, level=0)\n", " ihdtm_df.index = ihdtm_df.index.set_levels(ihdtm_df.index.levels[1]+25, level=1)\n", " main_df = main_df.join(ihdtm_df)\n", "main_df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...UrbanSuburbanElevationCumulative catchment areaSurface typeOutflowing drainage directionInflowing drainage directionFertiliser KFertiliser NFertiliser P
yx
1295000.05000.00000000000...00-9999-9999-1-1255NaNNaNNaN
15000.00000000000...00-9999-9999-1-1255NaNNaNNaN
25000.00000000000...00-9999-9999-1-1255NaNNaNNaN
35000.00000000000...00-9999-9999-1-1255NaNNaNNaN
45000.00000000000...00-9999-9999-1-1255NaNNaNNaN
.....................................................................
5000.0655000.00000000000...00-9999-9999-1-1255NaNNaNNaN
665000.00000000000...00-9999-9999-1-1255NaNNaNNaN
675000.00000000000...00-9999-9999-1-1255NaNNaNNaN
685000.00000000000...00-9999-9999-1-1255NaNNaNNaN
695000.00000000000...00-9999-9999-1-1255NaNNaNNaN
\n", "

9100 rows × 29 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1295000.0 5000.0 0 0 0 \n", " 15000.0 0 0 0 \n", " 25000.0 0 0 0 \n", " 35000.0 0 0 0 \n", " 45000.0 0 0 0 \n", "... ... ... ... \n", "5000.0 655000.0 0 0 0 \n", " 665000.0 0 0 0 \n", " 675000.0 0 0 0 \n", " 685000.0 0 0 0 \n", " 695000.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1295000.0 5000.0 0 0 \n", " 15000.0 0 0 \n", " 25000.0 0 0 \n", " 35000.0 0 0 \n", " 45000.0 0 0 \n", "... ... ... \n", "5000.0 655000.0 0 0 \n", " 665000.0 0 0 \n", " 675000.0 0 0 \n", " 685000.0 0 0 \n", " 695000.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1295000.0 5000.0 0 0 0 0 \n", " 15000.0 0 0 0 0 \n", " 25000.0 0 0 0 0 \n", " 35000.0 0 0 0 0 \n", " 45000.0 0 0 0 0 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 0 0 0 \n", " 665000.0 0 0 0 0 \n", " 675000.0 0 0 0 0 \n", " 685000.0 0 0 0 0 \n", " 695000.0 0 0 0 0 \n", "\n", " Heather grassland ... Urban Suburban Elevation \\\n", "y x ... \n", "1295000.0 5000.0 0 ... 0 0 -9999 \n", " 15000.0 0 ... 0 0 -9999 \n", " 25000.0 0 ... 0 0 -9999 \n", " 35000.0 0 ... 0 0 -9999 \n", " 45000.0 0 ... 0 0 -9999 \n", "... ... ... ... ... ... \n", "5000.0 655000.0 0 ... 0 0 -9999 \n", " 665000.0 0 ... 0 0 -9999 \n", " 675000.0 0 ... 0 0 -9999 \n", " 685000.0 0 ... 0 0 -9999 \n", " 695000.0 0 ... 0 0 -9999 \n", "\n", " Cumulative catchment area Surface type \\\n", "y x \n", "1295000.0 5000.0 -9999 -1 \n", " 15000.0 -9999 -1 \n", " 25000.0 -9999 -1 \n", " 35000.0 -9999 -1 \n", " 45000.0 -9999 -1 \n", "... ... ... \n", "5000.0 655000.0 -9999 -1 \n", " 665000.0 -9999 -1 \n", " 675000.0 -9999 -1 \n", " 685000.0 -9999 -1 \n", " 695000.0 -9999 -1 \n", "\n", " Outflowing drainage direction \\\n", "y x \n", "1295000.0 5000.0 -1 \n", " 15000.0 -1 \n", " 25000.0 -1 \n", " 35000.0 -1 \n", " 45000.0 -1 \n", "... ... \n", "5000.0 655000.0 -1 \n", " 665000.0 -1 \n", " 675000.0 -1 \n", " 685000.0 -1 \n", " 695000.0 -1 \n", "\n", " Inflowing drainage direction Fertiliser K Fertiliser N \\\n", "y x \n", "1295000.0 5000.0 255 NaN NaN \n", " 15000.0 255 NaN NaN \n", " 25000.0 255 NaN NaN \n", " 35000.0 255 NaN NaN \n", " 45000.0 255 NaN NaN \n", "... ... ... ... \n", "5000.0 655000.0 255 NaN NaN \n", " 665000.0 255 NaN NaN \n", " 675000.0 255 NaN NaN \n", " 685000.0 255 NaN NaN \n", " 695000.0 255 NaN NaN \n", "\n", " Fertiliser P \n", "y x \n", "1295000.0 5000.0 NaN \n", " 15000.0 NaN \n", " 25000.0 NaN \n", " 35000.0 NaN \n", " 45000.0 NaN \n", "... ... \n", "5000.0 655000.0 NaN \n", " 665000.0 NaN \n", " 675000.0 NaN \n", " 685000.0 NaN \n", " 695000.0 NaN \n", "\n", "[9100 rows x 29 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For each fertiliser, append its raster data to the main dataframe\n", "fertiliser = {'Fertiliser K' : FERTILISER_K_PATH, 'Fertiliser N' : FERTILISER_N_PATH, 'Fertiliser P' : FERTILISER_P_PATH}\n", "\n", "for key in fertiliser:\n", " fert_dataset = rioxarray.open_rasterio(fertiliser[key])\n", " fert_dataset.name = key\n", " fert_df = fert_dataset.to_dataframe().drop(columns='spatial_ref')\n", " fert_df = fert_df.drop(index=2).droplevel('band')\n", " fert_df.index = fert_df.index.set_levels(fert_df.index.levels[0]-1000, level=0)\n", " fert_df.index = fert_df.index.set_levels(fert_df.index.levels[1]-5000, level=1)\n", "\n", " main_df = main_df.join(fert_df)\n", "main_df\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Chlorothalonil_10kmGlyphosate_10kmMancozeb_10kmMecoprop-P_10kmMetamitron_10kmPendimethalin_10kmPropamocarbHydrochloride_10kmProsulfocarb_10kmSulphur_10kmTri-allate_10km
yx
1295000.05000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
15000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
25000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
45000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
.....................................................................
5000.0655000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
665000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
675000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
685000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
695000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

9100 rows × 39 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1295000.0 5000.0 0 0 0 \n", " 15000.0 0 0 0 \n", " 25000.0 0 0 0 \n", " 35000.0 0 0 0 \n", " 45000.0 0 0 0 \n", "... ... ... ... \n", "5000.0 655000.0 0 0 0 \n", " 665000.0 0 0 0 \n", " 675000.0 0 0 0 \n", " 685000.0 0 0 0 \n", " 695000.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1295000.0 5000.0 0 0 \n", " 15000.0 0 0 \n", " 25000.0 0 0 \n", " 35000.0 0 0 \n", " 45000.0 0 0 \n", "... ... ... \n", "5000.0 655000.0 0 0 \n", " 665000.0 0 0 \n", " 675000.0 0 0 \n", " 685000.0 0 0 \n", " 695000.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1295000.0 5000.0 0 0 0 0 \n", " 15000.0 0 0 0 0 \n", " 25000.0 0 0 0 0 \n", " 35000.0 0 0 0 0 \n", " 45000.0 0 0 0 0 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 0 0 0 \n", " 665000.0 0 0 0 0 \n", " 675000.0 0 0 0 0 \n", " 685000.0 0 0 0 0 \n", " 695000.0 0 0 0 0 \n", "\n", " Heather grassland ... Chlorothalonil_10km \\\n", "y x ... \n", "1295000.0 5000.0 0 ... NaN \n", " 15000.0 0 ... NaN \n", " 25000.0 0 ... NaN \n", " 35000.0 0 ... NaN \n", " 45000.0 0 ... NaN \n", "... ... ... ... \n", "5000.0 655000.0 0 ... NaN \n", " 665000.0 0 ... NaN \n", " 675000.0 0 ... NaN \n", " 685000.0 0 ... NaN \n", " 695000.0 0 ... NaN \n", "\n", " Glyphosate_10km Mancozeb_10km Mecoprop-P_10km \\\n", "y x \n", "1295000.0 5000.0 NaN NaN NaN \n", " 15000.0 NaN NaN NaN \n", " 25000.0 NaN NaN NaN \n", " 35000.0 NaN NaN NaN \n", " 45000.0 NaN NaN NaN \n", "... ... ... ... \n", "5000.0 655000.0 NaN NaN NaN \n", " 665000.0 NaN NaN NaN \n", " 675000.0 NaN NaN NaN \n", " 685000.0 NaN NaN NaN \n", " 695000.0 NaN NaN NaN \n", "\n", " Metamitron_10km Pendimethalin_10km \\\n", "y x \n", "1295000.0 5000.0 NaN NaN \n", " 15000.0 NaN NaN \n", " 25000.0 NaN NaN \n", " 35000.0 NaN NaN \n", " 45000.0 NaN NaN \n", "... ... ... \n", "5000.0 655000.0 NaN NaN \n", " 665000.0 NaN NaN \n", " 675000.0 NaN NaN \n", " 685000.0 NaN NaN \n", " 695000.0 NaN NaN \n", "\n", " PropamocarbHydrochloride_10km Prosulfocarb_10km \\\n", "y x \n", "1295000.0 5000.0 NaN NaN \n", " 15000.0 NaN NaN \n", " 25000.0 NaN NaN \n", " 35000.0 NaN NaN \n", " 45000.0 NaN NaN \n", "... ... ... \n", "5000.0 655000.0 NaN NaN \n", " 665000.0 NaN NaN \n", " 675000.0 NaN NaN \n", " 685000.0 NaN NaN \n", " 695000.0 NaN NaN \n", "\n", " Sulphur_10km Tri-allate_10km \n", "y x \n", "1295000.0 5000.0 NaN NaN \n", " 15000.0 NaN NaN \n", " 25000.0 NaN NaN \n", " 35000.0 NaN NaN \n", " 45000.0 NaN NaN \n", "... ... ... \n", "5000.0 655000.0 NaN NaN \n", " 665000.0 NaN NaN \n", " 675000.0 NaN NaN \n", " 685000.0 NaN NaN \n", " 695000.0 NaN NaN \n", "\n", "[9100 rows x 39 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For each file in pesticide folder, append its raster data to the main dataframe\n", "pest_dict = {}\n", "for file in os.listdir(PESTICIDE_FOLDER_PATH):\n", " filename = os.fsdecode(file)\n", " if not filename.endswith('.tif'):\n", " continue\n", " \n", " pest_dataset = rioxarray.open_rasterio(PESTICIDE_FOLDER_PATH+filename)\n", " pest_name = filename[:-4]\n", " pest_dataset.name = pest_name\n", " pest_df = pest_dataset.to_dataframe().drop(columns='spatial_ref')\n", " pest_df = pest_df.drop(index=2).droplevel('band')\n", " pest_dict[pest_name] = pest_df\n", "\n", " if not main_df.index.equals(pest_df.index):\n", " y_diff = pest_df.tail(1).index.get_level_values(0).tolist()[0] - 5000\n", " x_diff = pest_df.index.get_level_values(1).tolist()[0] - 5000\n", "\n", " pest_df.index = pest_df.index.set_levels(pest_df.index.levels[0] - y_diff, level=0)\n", " pest_df.index = pest_df.index.set_levels(pest_df.index.levels[1] - x_diff, level=1)\n", "\n", "\n", " main_df = main_df.join(pest_df)\n", "main_df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Chlorothalonil_10km
yx
1205000.05000.0-3.400000e+38
15000.0-3.400000e+38
25000.0-3.400000e+38
35000.0-3.400000e+38
45000.0-3.400000e+38
.........
5000.0555000.0-3.400000e+38
565000.0-3.400000e+38
575000.0-3.400000e+38
585000.0-3.400000e+38
595000.0-3.400000e+38
\n", "

7260 rows × 1 columns

\n", "
" ], "text/plain": [ " Chlorothalonil_10km\n", "y x \n", "1205000.0 5000.0 -3.400000e+38\n", " 15000.0 -3.400000e+38\n", " 25000.0 -3.400000e+38\n", " 35000.0 -3.400000e+38\n", " 45000.0 -3.400000e+38\n", "... ...\n", "5000.0 555000.0 -3.400000e+38\n", " 565000.0 -3.400000e+38\n", " 575000.0 -3.400000e+38\n", " 585000.0 -3.400000e+38\n", " 595000.0 -3.400000e+38\n", "\n", "[7260 rows x 1 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pest_dict['Chlorothalonil_10km']" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Glyphosate_10kmMancozeb_10kmMecoprop-P_10kmMetamitron_10kmPendimethalin_10kmPropamocarbHydrochloride_10kmProsulfocarb_10kmSulphur_10kmTri-allate_10kmOccurrence
yx
1295000.05000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
15000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
25000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
35000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
45000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
.....................................................................
5000.0655000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
665000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
675000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
685000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
695000.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
\n", "

9100 rows × 40 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1295000.0 5000.0 0 0 0 \n", " 15000.0 0 0 0 \n", " 25000.0 0 0 0 \n", " 35000.0 0 0 0 \n", " 45000.0 0 0 0 \n", "... ... ... ... \n", "5000.0 655000.0 0 0 0 \n", " 665000.0 0 0 0 \n", " 675000.0 0 0 0 \n", " 685000.0 0 0 0 \n", " 695000.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1295000.0 5000.0 0 0 \n", " 15000.0 0 0 \n", " 25000.0 0 0 \n", " 35000.0 0 0 \n", " 45000.0 0 0 \n", "... ... ... \n", "5000.0 655000.0 0 0 \n", " 665000.0 0 0 \n", " 675000.0 0 0 \n", " 685000.0 0 0 \n", " 695000.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1295000.0 5000.0 0 0 0 0 \n", " 15000.0 0 0 0 0 \n", " 25000.0 0 0 0 0 \n", " 35000.0 0 0 0 0 \n", " 45000.0 0 0 0 0 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 0 0 0 \n", " 665000.0 0 0 0 0 \n", " 675000.0 0 0 0 0 \n", " 685000.0 0 0 0 0 \n", " 695000.0 0 0 0 0 \n", "\n", " Heather grassland ... Glyphosate_10km Mancozeb_10km \\\n", "y x ... \n", "1295000.0 5000.0 0 ... NaN NaN \n", " 15000.0 0 ... NaN NaN \n", " 25000.0 0 ... NaN NaN \n", " 35000.0 0 ... NaN NaN \n", " 45000.0 0 ... NaN NaN \n", "... ... ... ... ... \n", "5000.0 655000.0 0 ... NaN NaN \n", " 665000.0 0 ... NaN NaN \n", " 675000.0 0 ... NaN NaN \n", " 685000.0 0 ... NaN NaN \n", " 695000.0 0 ... NaN NaN \n", "\n", " Mecoprop-P_10km Metamitron_10km Pendimethalin_10km \\\n", "y x \n", "1295000.0 5000.0 NaN NaN NaN \n", " 15000.0 NaN NaN NaN \n", " 25000.0 NaN NaN NaN \n", " 35000.0 NaN NaN NaN \n", " 45000.0 NaN NaN NaN \n", "... ... ... ... \n", "5000.0 655000.0 NaN NaN NaN \n", " 665000.0 NaN NaN NaN \n", " 675000.0 NaN NaN NaN \n", " 685000.0 NaN NaN NaN \n", " 695000.0 NaN NaN NaN \n", "\n", " PropamocarbHydrochloride_10km Prosulfocarb_10km \\\n", "y x \n", "1295000.0 5000.0 NaN NaN \n", " 15000.0 NaN NaN \n", " 25000.0 NaN NaN \n", " 35000.0 NaN NaN \n", " 45000.0 NaN NaN \n", "... ... ... \n", "5000.0 655000.0 NaN NaN \n", " 665000.0 NaN NaN \n", " 675000.0 NaN NaN \n", " 685000.0 NaN NaN \n", " 695000.0 NaN NaN \n", "\n", " Sulphur_10km Tri-allate_10km Occurrence \n", "y x \n", "1295000.0 5000.0 NaN NaN 0 \n", " 15000.0 NaN NaN 0 \n", " 25000.0 NaN NaN 0 \n", " 35000.0 NaN NaN 0 \n", " 45000.0 NaN NaN 0 \n", "... ... ... ... \n", "5000.0 655000.0 NaN NaN 0 \n", " 665000.0 NaN NaN 0 \n", " 675000.0 NaN NaN 0 \n", " 685000.0 NaN NaN 0 \n", " 695000.0 NaN NaN 0 \n", "\n", "[9100 rows x 40 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Label data with entire bird dataset\n", "# -> Convert every non-null value into 1s and every null value into 0\n", "\n", "bird_dataset = rioxarray.open_rasterio(INVASIVE_BIRDS_PATH)\n", "bird_dataset.name = 'data'\n", "bird_df = bird_dataset.squeeze().drop(\"spatial_ref\").drop(\"band\").to_dataframe()\n", "\n", "bird_df['Occurrence'] = [0 if x == -1 else 1 for x in bird_df['data']]\n", "main_df = main_df.join(bird_df.drop(columns='data'))\n", "main_df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Occurrence\n", "0 6428\n", "1 2672\n", "dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Checking to see values\n", "main_df.value_counts('Occurrence')\n", "\n", "# Needs to be cleaned as there's too many rows with no data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# Cleaning data\n", "# main_df = main_df.loc[main_df['Deciduous woodland']\n", "# + main_df['Coniferous woodland']\n", "# + main_df['Arable']\n", "# + main_df['Improve grassland']\n", "# + main_df['Neutral grassland']\n", "# + main_df['Calcareous grassland']\n", "# + main_df['Acid grassland']\n", "# + main_df['Fen']\n", "# + main_df['Heather']\n", "# + main_df['Heather grassland']\n", "# + main_df['Bog']\n", "# + main_df['Inland rock']\n", "# + main_df['Saltwater']\n", "# + main_df['Freshwater']\n", "# + main_df['Supralittoral rock']\n", "# + main_df['Supralittoral sediment']\n", "# + main_df['Littoral rock']\n", "# + main_df['Littoral sediment']\n", "# + main_df['Saltmarsh']\n", "# + main_df['Urban']\n", "# + main_df['Suburban'] != 0\n", "# ]\n", "# main_df.value_counts('Occurrence')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Fertiliser K', 'Fertiliser N', 'Fertiliser P', 'Chlorothalonil_10km', 'Glyphosate_10km', 'Mancozeb_10km', 'Mecoprop-P_10km', 'Metamitron_10km', 'Pendimethalin_10km', 'PropamocarbHydrochloride_10km', 'Prosulfocarb_10km', 'Sulphur_10km', 'Tri-allate_10km']\n" ] } ], "source": [ "# Show columns with null values\n", "nan_columns = []\n", "for column in main_df:\n", " if main_df[column].isnull().values.any():\n", " nan_columns.append(column)\n", "print(nan_columns)\n", "\n", "# Only the pesticides have null values\n", "# Only in the 10km cell exists a single cell where fertiliser is NaN -> (index no. 1616)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fertiliser K Min: -3.4028230607370965e+38\n", "Fertiliser N Min: -3.4028230607370965e+38\n", "Fertiliser P Min: -3.4028230607370965e+38\n", "Chlorothalonil_10km Min: -3.3999999521443642e+38\n", "Glyphosate_10km Min: -3.3999999521443642e+38\n", "Mancozeb_10km Min: -3.3999999521443642e+38\n", "Mecoprop-P_10km Min: -3.3999999521443642e+38\n", "Metamitron_10km Min: -3.3999999521443642e+38\n", "Pendimethalin_10km Min: -3.3999999521443642e+38\n", "PropamocarbHydrochloride_10km Min: -3.3999999521443642e+38\n", "Prosulfocarb_10km Min: -3.3999999521443642e+38\n", "Sulphur_10km Min: -3.3999999521443642e+38\n", "Tri-allate_10km Min: -3.3999999521443642e+38\n" ] } ], "source": [ "# Replace null values with the minimum float value\n", "for column in main_df:\n", " if main_df[column].isnull().values.any():\n", " print(f'{column} Min: {np.nanmin(main_df[column])}')\n", " main_df[column] = main_df[column].fillna(np.nanmin(main_df[column]))\n", "\n", "# main_df = main_df.fillna(main_df.notna().values.any().min())\n", "# old value: -3.4e+38" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Glyphosate_10kmMancozeb_10kmMecoprop-P_10kmMetamitron_10kmPendimethalin_10kmPropamocarbHydrochloride_10kmProsulfocarb_10kmSulphur_10kmTri-allate_10kmOccurrence
yx
1295000.05000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
15000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
25000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
35000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
45000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
.....................................................................
5000.0655000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
665000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
675000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
685000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
695000.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
\n", "

9100 rows × 40 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1295000.0 5000.0 0 0 0 \n", " 15000.0 0 0 0 \n", " 25000.0 0 0 0 \n", " 35000.0 0 0 0 \n", " 45000.0 0 0 0 \n", "... ... ... ... \n", "5000.0 655000.0 0 0 0 \n", " 665000.0 0 0 0 \n", " 675000.0 0 0 0 \n", " 685000.0 0 0 0 \n", " 695000.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1295000.0 5000.0 0 0 \n", " 15000.0 0 0 \n", " 25000.0 0 0 \n", " 35000.0 0 0 \n", " 45000.0 0 0 \n", "... ... ... \n", "5000.0 655000.0 0 0 \n", " 665000.0 0 0 \n", " 675000.0 0 0 \n", " 685000.0 0 0 \n", " 695000.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1295000.0 5000.0 0 0 0 0 \n", " 15000.0 0 0 0 0 \n", " 25000.0 0 0 0 0 \n", " 35000.0 0 0 0 0 \n", " 45000.0 0 0 0 0 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 0 0 0 \n", " 665000.0 0 0 0 0 \n", " 675000.0 0 0 0 0 \n", " 685000.0 0 0 0 0 \n", " 695000.0 0 0 0 0 \n", "\n", " Heather grassland ... Glyphosate_10km Mancozeb_10km \\\n", "y x ... \n", "1295000.0 5000.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 15000.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 25000.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 35000.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 45000.0 0 ... -3.400000e+38 -3.400000e+38 \n", "... ... ... ... ... \n", "5000.0 655000.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 665000.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 675000.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 685000.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 695000.0 0 ... -3.400000e+38 -3.400000e+38 \n", "\n", " Mecoprop-P_10km Metamitron_10km Pendimethalin_10km \\\n", "y x \n", "1295000.0 5000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 15000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 25000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 35000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 45000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", "... ... ... ... \n", "5000.0 655000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 665000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 675000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 685000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 695000.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", "\n", " PropamocarbHydrochloride_10km Prosulfocarb_10km \\\n", "y x \n", "1295000.0 5000.0 -3.400000e+38 -3.400000e+38 \n", " 15000.0 -3.400000e+38 -3.400000e+38 \n", " 25000.0 -3.400000e+38 -3.400000e+38 \n", " 35000.0 -3.400000e+38 -3.400000e+38 \n", " 45000.0 -3.400000e+38 -3.400000e+38 \n", "... ... ... \n", "5000.0 655000.0 -3.400000e+38 -3.400000e+38 \n", " 665000.0 -3.400000e+38 -3.400000e+38 \n", " 675000.0 -3.400000e+38 -3.400000e+38 \n", " 685000.0 -3.400000e+38 -3.400000e+38 \n", " 695000.0 -3.400000e+38 -3.400000e+38 \n", "\n", " Sulphur_10km Tri-allate_10km Occurrence \n", "y x \n", "1295000.0 5000.0 -3.400000e+38 -3.400000e+38 0 \n", " 15000.0 -3.400000e+38 -3.400000e+38 0 \n", " 25000.0 -3.400000e+38 -3.400000e+38 0 \n", " 35000.0 -3.400000e+38 -3.400000e+38 0 \n", " 45000.0 -3.400000e+38 -3.400000e+38 0 \n", "... ... ... ... \n", "5000.0 655000.0 -3.400000e+38 -3.400000e+38 0 \n", " 665000.0 -3.400000e+38 -3.400000e+38 0 \n", " 675000.0 -3.400000e+38 -3.400000e+38 0 \n", " 685000.0 -3.400000e+38 -3.400000e+38 0 \n", " 695000.0 -3.400000e+38 -3.400000e+38 0 \n", "\n", "[9100 rows x 40 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "main_df" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# Output dataframe as csv\n", "main_df.to_csv('Datasets/Machine Learning/Dataframes/10km_All_Birds_DF.csv')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.13 ('env': venv)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "f025c48a9b67ab76bdc0400dfa0f9ba99120976b4a6ec6a63d1c946516165c91" } } }, "nbformat": 4, "nbformat_minor": 2 }