{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import rioxarray\n", "import os\n", "import numpy as np\n", "\n", "\n", "# <<< INVASIVE SPECIES MAP >>>\n", "INVASIVE_BIRDS_PATH = 'Datasets/Machine Learning/5km Rasters/Birds/All_Invasive_Birds_5km.tif'\n", "\n", "# <<< LAND COVER MAP >>>\n", "# Dimensions: 700000x1300000\n", "LAND_COVER_MAP_PATH = 'Datasets/Machine Learning/5km Rasters/Features/gb2021lcm5km_percentage_target.tif'\n", "\n", "# <<< FERTILISER >>>\n", "# The dataset consists of maps of the predicted average annual application rates (2010-2015) of three different inorganic \n", "# chemical fertilisers – nitrogen (N), phosphorus (P) and potassium (K) – in England across a six-year period, along with \n", "# their respective estimates of uncertainty, at a 1 km x 1 km resolution. \n", "FERTILISER_K_PATH = 'Datasets/Machine Learning/5km Rasters/Features/fertiliser_k_prediction_uncertainty_5km.tif'\n", "FERTILISER_N_PATH = 'Datasets/Machine Learning/5km Rasters/Features/fertiliser_n_prediction_uncertainty_5km.tif'\n", "FERTILISER_P_PATH = 'Datasets/Machine Learning/5km Rasters/Features/fertiliser_p_prediction_uncertainty_5km.tif'\n", "\n", "# <<< PESTICIDE >>>\n", "PESTICIDE_FOLDER_PATH = 'Datasets/Machine Learning/5km Rasters/Features/Pesticides/'\n", "\n", "# <<< INTEGRATED HYDROLOGICAL DIGITAL TERRAIN MODEL >>>\n", "# Dimensions: 700000x1300000\n", "# These datasets all only have one band\n", "ELEVATION_PATH = 'Datasets/Machine Learning/5km Rasters/Features/HGHT_5km.tif'\n", "CUMULATIVE_CATCHMENT_AREA_PATH = 'Datasets/Machine Learning/5km Rasters/Features/CCAR_5km.tif'\n", "SURFACE_TYPE_PATH = 'Datasets/Machine Learning/5km Rasters/Features/SURF_5km.tif'\n", "OUTFLOWING_DRAINAGE_DIRECTION_PATH = 'Datasets/Machine Learning/5km Rasters/Features/OUTF_5km.tif'\n", "INFLOWING_DRAINAGE_PATTERN_PATH = 'Datasets/Machine Learning/5km Rasters/Features/INFL_5km.tif'\n", "\n", "IHDTM = {\n", " 'Elevation': ELEVATION_PATH, \n", " 'Cumulative catchment area': CUMULATIVE_CATCHMENT_AREA_PATH, \n", " 'Surface type': SURFACE_TYPE_PATH, \n", " 'Outflowing drainage direction': OUTFLOWING_DRAINAGE_DIRECTION_PATH, \n", " 'Inflowing drainage direction': INFLOWING_DRAINAGE_PATTERN_PATH\n", "}\n", "\n", "\n", "# <<< Opening in rasterio >>>\n", "# dataset = rasterio.open(FERTILISER_P_PATH)\n", "# data = dataset.read()\n", "# np.max(data)\n", "\n", "# <<< Opening in rioxarray >>>\n", "# dataset = rioxarray.open_rasterio(ELEVATION_PATH)\n", "# dataset.name = 'data'\n", "# df = dataset.to_dataframe()\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data\n", "0 728295\n", "1 4899\n", "2 2811\n", "3 1994\n", "4 1576\n", " ... \n", "83 120\n", "88 120\n", "82 117\n", "93 108\n", "94 100\n", "Length: 101, dtype: int64\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bandDeciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Inland rockSaltwaterFreshwaterSupralittoral rockSupralittoral sedimentLittoral rockLittoral sedimentSaltmarshUrbanSuburban
yx
1297500.02500.00000000000...0000000000
7500.00000000000...0000000000
12500.00000000000...0000000000
17500.00000000000...0000000000
22500.00000000000...0000000000
.....................................................................
2500.0677500.00000000000...0000000000
682500.00000000000...0000000000
687500.00000000000...0000000000
692500.00000000000...0000000000
697500.00000000000...0000000000
\n", "

36400 rows × 21 columns

\n", "
" ], "text/plain": [ "band Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1297500.0 2500.0 0 0 0 \n", " 7500.0 0 0 0 \n", " 12500.0 0 0 0 \n", " 17500.0 0 0 0 \n", " 22500.0 0 0 0 \n", "... ... ... ... \n", "2500.0 677500.0 0 0 0 \n", " 682500.0 0 0 0 \n", " 687500.0 0 0 0 \n", " 692500.0 0 0 0 \n", " 697500.0 0 0 0 \n", "\n", "band Improve grassland Neutral grassland \\\n", "y x \n", "1297500.0 2500.0 0 0 \n", " 7500.0 0 0 \n", " 12500.0 0 0 \n", " 17500.0 0 0 \n", " 22500.0 0 0 \n", "... ... ... \n", "2500.0 677500.0 0 0 \n", " 682500.0 0 0 \n", " 687500.0 0 0 \n", " 692500.0 0 0 \n", " 697500.0 0 0 \n", "\n", "band Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1297500.0 2500.0 0 0 0 0 \n", " 7500.0 0 0 0 0 \n", " 12500.0 0 0 0 0 \n", " 17500.0 0 0 0 0 \n", " 22500.0 0 0 0 0 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 0 0 0 \n", " 682500.0 0 0 0 0 \n", " 687500.0 0 0 0 0 \n", " 692500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", "\n", "band Heather grassland ... Inland rock Saltwater \\\n", "y x ... \n", "1297500.0 2500.0 0 ... 0 0 \n", " 7500.0 0 ... 0 0 \n", " 12500.0 0 ... 0 0 \n", " 17500.0 0 ... 0 0 \n", " 22500.0 0 ... 0 0 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 ... 0 0 \n", " 682500.0 0 ... 0 0 \n", " 687500.0 0 ... 0 0 \n", " 692500.0 0 ... 0 0 \n", " 697500.0 0 ... 0 0 \n", "\n", "band Freshwater Supralittoral rock Supralittoral sediment \\\n", "y x \n", "1297500.0 2500.0 0 0 0 \n", " 7500.0 0 0 0 \n", " 12500.0 0 0 0 \n", " 17500.0 0 0 0 \n", " 22500.0 0 0 0 \n", "... ... ... ... \n", "2500.0 677500.0 0 0 0 \n", " 682500.0 0 0 0 \n", " 687500.0 0 0 0 \n", " 692500.0 0 0 0 \n", " 697500.0 0 0 0 \n", "\n", "band Littoral rock Littoral sediment Saltmarsh Urban \\\n", "y x \n", "1297500.0 2500.0 0 0 0 0 \n", " 7500.0 0 0 0 0 \n", " 12500.0 0 0 0 0 \n", " 17500.0 0 0 0 0 \n", " 22500.0 0 0 0 0 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 0 0 0 \n", " 682500.0 0 0 0 0 \n", " 687500.0 0 0 0 0 \n", " 692500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", "\n", "band Suburban \n", "y x \n", "1297500.0 2500.0 0 \n", " 7500.0 0 \n", " 12500.0 0 \n", " 17500.0 0 \n", " 22500.0 0 \n", "... ... \n", "2500.0 677500.0 0 \n", " 682500.0 0 \n", " 687500.0 0 \n", " 692500.0 0 \n", " 697500.0 0 \n", "\n", "[36400 rows x 21 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Open the land cover map raster file, transform its bands into feature columns and use this as the base for the main dataframe\n", "# as this has the widest extent out of all the files and is arguably one of the most important dataframes\n", "\n", "lcm = rioxarray.open_rasterio(LAND_COVER_MAP_PATH)\n", "lcm.name = 'data'\n", "main_df = lcm.to_dataframe().drop(columns='spatial_ref')\n", "print(main_df.value_counts())\n", "main_df = main_df.unstack(level='band')\n", "\n", "LCM_CLASSES = [\n", " 'Deciduous woodland', \n", " 'Coniferous woodland', \n", " 'Arable', \n", " 'Improve grassland', \n", " 'Neutral grassland', \n", " 'Calcareous grassland', \n", " 'Acid grassland', \n", " 'Fen', \n", " 'Heather', \n", " 'Heather grassland', \n", " 'Bog',\n", " 'Inland rock', \n", " 'Saltwater',\n", " 'Freshwater',\n", " 'Supralittoral rock',\n", " 'Supralittoral sediment',\n", " 'Littoral rock',\n", " 'Littoral sediment',\n", " 'Saltmarsh',\n", " 'Urban',\n", " 'Suburban'\n", " ]\n", "\n", "main_df = main_df['data'].rename(columns={i+1: j for i, j in enumerate(LCM_CLASSES)})\n", "main_df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Littoral rockLittoral sedimentSaltmarshUrbanSuburbanElevationCumulative catchment areaSurface typeOutflowing drainage directionInflowing drainage direction
yx
1297500.02500.00000000000...00000-9999-9999-1-1255
7500.00000000000...00000-9999-9999-1-1255
12500.00000000000...00000-9999-9999-1-1255
17500.00000000000...00000-9999-9999-1-1255
22500.00000000000...00000-9999-9999-1-1255
.....................................................................
2500.0677500.00000000000...00000-9999-9999-1-1255
682500.00000000000...00000-9999-9999-1-1255
687500.00000000000...00000-9999-9999-1-1255
692500.00000000000...00000-9999-9999-1-1255
697500.00000000000...00000-9999-9999-1-1255
\n", "

36400 rows × 26 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1297500.0 2500.0 0 0 0 \n", " 7500.0 0 0 0 \n", " 12500.0 0 0 0 \n", " 17500.0 0 0 0 \n", " 22500.0 0 0 0 \n", "... ... ... ... \n", "2500.0 677500.0 0 0 0 \n", " 682500.0 0 0 0 \n", " 687500.0 0 0 0 \n", " 692500.0 0 0 0 \n", " 697500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1297500.0 2500.0 0 0 \n", " 7500.0 0 0 \n", " 12500.0 0 0 \n", " 17500.0 0 0 \n", " 22500.0 0 0 \n", "... ... ... \n", "2500.0 677500.0 0 0 \n", " 682500.0 0 0 \n", " 687500.0 0 0 \n", " 692500.0 0 0 \n", " 697500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1297500.0 2500.0 0 0 0 0 \n", " 7500.0 0 0 0 0 \n", " 12500.0 0 0 0 0 \n", " 17500.0 0 0 0 0 \n", " 22500.0 0 0 0 0 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 0 0 0 \n", " 682500.0 0 0 0 0 \n", " 687500.0 0 0 0 0 \n", " 692500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", "\n", " Heather grassland ... Littoral rock Littoral sediment \\\n", "y x ... \n", "1297500.0 2500.0 0 ... 0 0 \n", " 7500.0 0 ... 0 0 \n", " 12500.0 0 ... 0 0 \n", " 17500.0 0 ... 0 0 \n", " 22500.0 0 ... 0 0 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 ... 0 0 \n", " 682500.0 0 ... 0 0 \n", " 687500.0 0 ... 0 0 \n", " 692500.0 0 ... 0 0 \n", " 697500.0 0 ... 0 0 \n", "\n", " Saltmarsh Urban Suburban Elevation \\\n", "y x \n", "1297500.0 2500.0 0 0 0 -9999 \n", " 7500.0 0 0 0 -9999 \n", " 12500.0 0 0 0 -9999 \n", " 17500.0 0 0 0 -9999 \n", " 22500.0 0 0 0 -9999 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 0 0 -9999 \n", " 682500.0 0 0 0 -9999 \n", " 687500.0 0 0 0 -9999 \n", " 692500.0 0 0 0 -9999 \n", " 697500.0 0 0 0 -9999 \n", "\n", " Cumulative catchment area Surface type \\\n", "y x \n", "1297500.0 2500.0 -9999 -1 \n", " 7500.0 -9999 -1 \n", " 12500.0 -9999 -1 \n", " 17500.0 -9999 -1 \n", " 22500.0 -9999 -1 \n", "... ... ... \n", "2500.0 677500.0 -9999 -1 \n", " 682500.0 -9999 -1 \n", " 687500.0 -9999 -1 \n", " 692500.0 -9999 -1 \n", " 697500.0 -9999 -1 \n", "\n", " Outflowing drainage direction \\\n", "y x \n", "1297500.0 2500.0 -1 \n", " 7500.0 -1 \n", " 12500.0 -1 \n", " 17500.0 -1 \n", " 22500.0 -1 \n", "... ... \n", "2500.0 677500.0 -1 \n", " 682500.0 -1 \n", " 687500.0 -1 \n", " 692500.0 -1 \n", " 697500.0 -1 \n", "\n", " Inflowing drainage direction \n", "y x \n", "1297500.0 2500.0 255 \n", " 7500.0 255 \n", " 12500.0 255 \n", " 17500.0 255 \n", " 22500.0 255 \n", "... ... \n", "2500.0 677500.0 255 \n", " 682500.0 255 \n", " 687500.0 255 \n", " 692500.0 255 \n", " 697500.0 255 \n", "\n", "[36400 rows x 26 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For each IHDTM file, append its raster data to the main dataframe\n", "for key in IHDTM:\n", " ihdtm_data = rioxarray.open_rasterio(IHDTM[key])\n", " ihdtm_data = ihdtm_data.squeeze().drop(\"spatial_ref\").drop(\"band\")\n", " ihdtm_data.name = key\n", " ihdtm_df = ihdtm_data.to_dataframe()\n", "\n", " # Adding 25 to x and y coordinates to match index of other datasets\n", " ihdtm_df.index = ihdtm_df.index.set_levels(ihdtm_df.index.levels[0]+25, level=0)\n", " ihdtm_df.index = ihdtm_df.index.set_levels(ihdtm_df.index.levels[1]+25, level=1)\n", " main_df = main_df.join(ihdtm_df)\n", "main_df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...UrbanSuburbanElevationCumulative catchment areaSurface typeOutflowing drainage directionInflowing drainage directionFertiliser KFertiliser NFertiliser P
yx
1297500.02500.00000000000...00-9999-9999-1-1255NaNNaNNaN
7500.00000000000...00-9999-9999-1-1255NaNNaNNaN
12500.00000000000...00-9999-9999-1-1255NaNNaNNaN
17500.00000000000...00-9999-9999-1-1255NaNNaNNaN
22500.00000000000...00-9999-9999-1-1255NaNNaNNaN
.....................................................................
2500.0677500.00000000000...00-9999-9999-1-1255NaNNaNNaN
682500.00000000000...00-9999-9999-1-1255NaNNaNNaN
687500.00000000000...00-9999-9999-1-1255NaNNaNNaN
692500.00000000000...00-9999-9999-1-1255NaNNaNNaN
697500.00000000000...00-9999-9999-1-1255NaNNaNNaN
\n", "

36400 rows × 29 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1297500.0 2500.0 0 0 0 \n", " 7500.0 0 0 0 \n", " 12500.0 0 0 0 \n", " 17500.0 0 0 0 \n", " 22500.0 0 0 0 \n", "... ... ... ... \n", "2500.0 677500.0 0 0 0 \n", " 682500.0 0 0 0 \n", " 687500.0 0 0 0 \n", " 692500.0 0 0 0 \n", " 697500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1297500.0 2500.0 0 0 \n", " 7500.0 0 0 \n", " 12500.0 0 0 \n", " 17500.0 0 0 \n", " 22500.0 0 0 \n", "... ... ... \n", "2500.0 677500.0 0 0 \n", " 682500.0 0 0 \n", " 687500.0 0 0 \n", " 692500.0 0 0 \n", " 697500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1297500.0 2500.0 0 0 0 0 \n", " 7500.0 0 0 0 0 \n", " 12500.0 0 0 0 0 \n", " 17500.0 0 0 0 0 \n", " 22500.0 0 0 0 0 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 0 0 0 \n", " 682500.0 0 0 0 0 \n", " 687500.0 0 0 0 0 \n", " 692500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", "\n", " Heather grassland ... Urban Suburban Elevation \\\n", "y x ... \n", "1297500.0 2500.0 0 ... 0 0 -9999 \n", " 7500.0 0 ... 0 0 -9999 \n", " 12500.0 0 ... 0 0 -9999 \n", " 17500.0 0 ... 0 0 -9999 \n", " 22500.0 0 ... 0 0 -9999 \n", "... ... ... ... ... ... \n", "2500.0 677500.0 0 ... 0 0 -9999 \n", " 682500.0 0 ... 0 0 -9999 \n", " 687500.0 0 ... 0 0 -9999 \n", " 692500.0 0 ... 0 0 -9999 \n", " 697500.0 0 ... 0 0 -9999 \n", "\n", " Cumulative catchment area Surface type \\\n", "y x \n", "1297500.0 2500.0 -9999 -1 \n", " 7500.0 -9999 -1 \n", " 12500.0 -9999 -1 \n", " 17500.0 -9999 -1 \n", " 22500.0 -9999 -1 \n", "... ... ... \n", "2500.0 677500.0 -9999 -1 \n", " 682500.0 -9999 -1 \n", " 687500.0 -9999 -1 \n", " 692500.0 -9999 -1 \n", " 697500.0 -9999 -1 \n", "\n", " Outflowing drainage direction \\\n", "y x \n", "1297500.0 2500.0 -1 \n", " 7500.0 -1 \n", " 12500.0 -1 \n", " 17500.0 -1 \n", " 22500.0 -1 \n", "... ... \n", "2500.0 677500.0 -1 \n", " 682500.0 -1 \n", " 687500.0 -1 \n", " 692500.0 -1 \n", " 697500.0 -1 \n", "\n", " Inflowing drainage direction Fertiliser K Fertiliser N \\\n", "y x \n", "1297500.0 2500.0 255 NaN NaN \n", " 7500.0 255 NaN NaN \n", " 12500.0 255 NaN NaN \n", " 17500.0 255 NaN NaN \n", " 22500.0 255 NaN NaN \n", "... ... ... ... \n", "2500.0 677500.0 255 NaN NaN \n", " 682500.0 255 NaN NaN \n", " 687500.0 255 NaN NaN \n", " 692500.0 255 NaN NaN \n", " 697500.0 255 NaN NaN \n", "\n", " Fertiliser P \n", "y x \n", "1297500.0 2500.0 NaN \n", " 7500.0 NaN \n", " 12500.0 NaN \n", " 17500.0 NaN \n", " 22500.0 NaN \n", "... ... \n", "2500.0 677500.0 NaN \n", " 682500.0 NaN \n", " 687500.0 NaN \n", " 692500.0 NaN \n", " 697500.0 NaN \n", "\n", "[36400 rows x 29 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For each fertiliser, append its raster data to the main dataframe\n", "fertiliser = {'Fertiliser K' : FERTILISER_K_PATH, 'Fertiliser N' : FERTILISER_N_PATH, 'Fertiliser P' : FERTILISER_P_PATH}\n", "\n", "for key in fertiliser:\n", " fert_dataset = rioxarray.open_rasterio(fertiliser[key])\n", " fert_dataset.name = key\n", " fert_df = fert_dataset.to_dataframe().drop(columns='spatial_ref')\n", " fert_df = fert_df.drop(index=2).droplevel('band')\n", " fert_df.index = fert_df.index.set_levels(fert_df.index.levels[0]-1000, level=0)\n", "\n", " main_df = main_df.join(fert_df)\n", "main_df\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Chlorothalonil_5kmGlyphosate_5kmMancozeb_5kmMecoprop-P_5kmMetamitron_5kmPendimethalin_5kmPropamocarbHydrochloride_5kmProsulfocarb_5kmSulphur_5kmTri-allate_5km
yx
1297500.02500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
7500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
12500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
17500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
22500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
.....................................................................
2500.0677500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
682500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
687500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
692500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
697500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

36400 rows × 39 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1297500.0 2500.0 0 0 0 \n", " 7500.0 0 0 0 \n", " 12500.0 0 0 0 \n", " 17500.0 0 0 0 \n", " 22500.0 0 0 0 \n", "... ... ... ... \n", "2500.0 677500.0 0 0 0 \n", " 682500.0 0 0 0 \n", " 687500.0 0 0 0 \n", " 692500.0 0 0 0 \n", " 697500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1297500.0 2500.0 0 0 \n", " 7500.0 0 0 \n", " 12500.0 0 0 \n", " 17500.0 0 0 \n", " 22500.0 0 0 \n", "... ... ... \n", "2500.0 677500.0 0 0 \n", " 682500.0 0 0 \n", " 687500.0 0 0 \n", " 692500.0 0 0 \n", " 697500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1297500.0 2500.0 0 0 0 0 \n", " 7500.0 0 0 0 0 \n", " 12500.0 0 0 0 0 \n", " 17500.0 0 0 0 0 \n", " 22500.0 0 0 0 0 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 0 0 0 \n", " 682500.0 0 0 0 0 \n", " 687500.0 0 0 0 0 \n", " 692500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", "\n", " Heather grassland ... Chlorothalonil_5km \\\n", "y x ... \n", "1297500.0 2500.0 0 ... NaN \n", " 7500.0 0 ... NaN \n", " 12500.0 0 ... NaN \n", " 17500.0 0 ... NaN \n", " 22500.0 0 ... NaN \n", "... ... ... ... \n", "2500.0 677500.0 0 ... NaN \n", " 682500.0 0 ... NaN \n", " 687500.0 0 ... NaN \n", " 692500.0 0 ... NaN \n", " 697500.0 0 ... NaN \n", "\n", " Glyphosate_5km Mancozeb_5km Mecoprop-P_5km \\\n", "y x \n", "1297500.0 2500.0 NaN NaN NaN \n", " 7500.0 NaN NaN NaN \n", " 12500.0 NaN NaN NaN \n", " 17500.0 NaN NaN NaN \n", " 22500.0 NaN NaN NaN \n", "... ... ... ... \n", "2500.0 677500.0 NaN NaN NaN \n", " 682500.0 NaN NaN NaN \n", " 687500.0 NaN NaN NaN \n", " 692500.0 NaN NaN NaN \n", " 697500.0 NaN NaN NaN \n", "\n", " Metamitron_5km Pendimethalin_5km \\\n", "y x \n", "1297500.0 2500.0 NaN NaN \n", " 7500.0 NaN NaN \n", " 12500.0 NaN NaN \n", " 17500.0 NaN NaN \n", " 22500.0 NaN NaN \n", "... ... ... \n", "2500.0 677500.0 NaN NaN \n", " 682500.0 NaN NaN \n", " 687500.0 NaN NaN \n", " 692500.0 NaN NaN \n", " 697500.0 NaN NaN \n", "\n", " PropamocarbHydrochloride_5km Prosulfocarb_5km \\\n", "y x \n", "1297500.0 2500.0 NaN NaN \n", " 7500.0 NaN NaN \n", " 12500.0 NaN NaN \n", " 17500.0 NaN NaN \n", " 22500.0 NaN NaN \n", "... ... ... \n", "2500.0 677500.0 NaN NaN \n", " 682500.0 NaN NaN \n", " 687500.0 NaN NaN \n", " 692500.0 NaN NaN \n", " 697500.0 NaN NaN \n", "\n", " Sulphur_5km Tri-allate_5km \n", "y x \n", "1297500.0 2500.0 NaN NaN \n", " 7500.0 NaN NaN \n", " 12500.0 NaN NaN \n", " 17500.0 NaN NaN \n", " 22500.0 NaN NaN \n", "... ... ... \n", "2500.0 677500.0 NaN NaN \n", " 682500.0 NaN NaN \n", " 687500.0 NaN NaN \n", " 692500.0 NaN NaN \n", " 697500.0 NaN NaN \n", "\n", "[36400 rows x 39 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For each file in pesticide folder, append its raster data to the main dataframe\n", "pest_dict = {}\n", "for file in os.listdir(PESTICIDE_FOLDER_PATH):\n", " filename = os.fsdecode(file)\n", " if not filename.endswith('.tif'):\n", " continue\n", " \n", " pest_dataset = rioxarray.open_rasterio(PESTICIDE_FOLDER_PATH+filename)\n", " pest_name = filename[:-4]\n", " pest_dataset.name = pest_name\n", " pest_df = pest_dataset.to_dataframe().drop(columns='spatial_ref')\n", " pest_df = pest_df.drop(index=2).droplevel('band')\n", " pest_dict[pest_name] = pest_df\n", "\n", " if not main_df.index.equals(pest_df.index):\n", " y_diff = pest_df.tail(1).index.get_level_values(0).tolist()[0] - 2500\n", " x_diff = pest_df.index.get_level_values(1).tolist()[0] - 2500\n", "\n", " pest_df.index = pest_df.index.set_levels(pest_df.index.levels[0] - y_diff, level=0)\n", " pest_df.index = pest_df.index.set_levels(pest_df.index.levels[1] - x_diff, level=1)\n", "\n", "\n", " main_df = main_df.join(pest_df)\n", "main_df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Glyphosate_5kmMancozeb_5kmMecoprop-P_5kmMetamitron_5kmPendimethalin_5kmPropamocarbHydrochloride_5kmProsulfocarb_5kmSulphur_5kmTri-allate_5kmOccurrence
yx
1297500.02500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
7500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
12500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
17500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
22500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
.....................................................................
2500.0677500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
682500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
687500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
692500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
697500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
\n", "

36400 rows × 40 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1297500.0 2500.0 0 0 0 \n", " 7500.0 0 0 0 \n", " 12500.0 0 0 0 \n", " 17500.0 0 0 0 \n", " 22500.0 0 0 0 \n", "... ... ... ... \n", "2500.0 677500.0 0 0 0 \n", " 682500.0 0 0 0 \n", " 687500.0 0 0 0 \n", " 692500.0 0 0 0 \n", " 697500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1297500.0 2500.0 0 0 \n", " 7500.0 0 0 \n", " 12500.0 0 0 \n", " 17500.0 0 0 \n", " 22500.0 0 0 \n", "... ... ... \n", "2500.0 677500.0 0 0 \n", " 682500.0 0 0 \n", " 687500.0 0 0 \n", " 692500.0 0 0 \n", " 697500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1297500.0 2500.0 0 0 0 0 \n", " 7500.0 0 0 0 0 \n", " 12500.0 0 0 0 0 \n", " 17500.0 0 0 0 0 \n", " 22500.0 0 0 0 0 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 0 0 0 \n", " 682500.0 0 0 0 0 \n", " 687500.0 0 0 0 0 \n", " 692500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", "\n", " Heather grassland ... Glyphosate_5km Mancozeb_5km \\\n", "y x ... \n", "1297500.0 2500.0 0 ... NaN NaN \n", " 7500.0 0 ... NaN NaN \n", " 12500.0 0 ... NaN NaN \n", " 17500.0 0 ... NaN NaN \n", " 22500.0 0 ... NaN NaN \n", "... ... ... ... ... \n", "2500.0 677500.0 0 ... NaN NaN \n", " 682500.0 0 ... NaN NaN \n", " 687500.0 0 ... NaN NaN \n", " 692500.0 0 ... NaN NaN \n", " 697500.0 0 ... NaN NaN \n", "\n", " Mecoprop-P_5km Metamitron_5km Pendimethalin_5km \\\n", "y x \n", "1297500.0 2500.0 NaN NaN NaN \n", " 7500.0 NaN NaN NaN \n", " 12500.0 NaN NaN NaN \n", " 17500.0 NaN NaN NaN \n", " 22500.0 NaN NaN NaN \n", "... ... ... ... \n", "2500.0 677500.0 NaN NaN NaN \n", " 682500.0 NaN NaN NaN \n", " 687500.0 NaN NaN NaN \n", " 692500.0 NaN NaN NaN \n", " 697500.0 NaN NaN NaN \n", "\n", " PropamocarbHydrochloride_5km Prosulfocarb_5km \\\n", "y x \n", "1297500.0 2500.0 NaN NaN \n", " 7500.0 NaN NaN \n", " 12500.0 NaN NaN \n", " 17500.0 NaN NaN \n", " 22500.0 NaN NaN \n", "... ... ... \n", "2500.0 677500.0 NaN NaN \n", " 682500.0 NaN NaN \n", " 687500.0 NaN NaN \n", " 692500.0 NaN NaN \n", " 697500.0 NaN NaN \n", "\n", " Sulphur_5km Tri-allate_5km Occurrence \n", "y x \n", "1297500.0 2500.0 NaN NaN 0 \n", " 7500.0 NaN NaN 0 \n", " 12500.0 NaN NaN 0 \n", " 17500.0 NaN NaN 0 \n", " 22500.0 NaN NaN 0 \n", "... ... ... ... \n", "2500.0 677500.0 NaN NaN 0 \n", " 682500.0 NaN NaN 0 \n", " 687500.0 NaN NaN 0 \n", " 692500.0 NaN NaN 0 \n", " 697500.0 NaN NaN 0 \n", "\n", "[36400 rows x 40 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Label data with entire bird dataset\n", "# -> Convert every non-null value into 1s and every null value into 0\n", "\n", "bird_dataset = rioxarray.open_rasterio(INVASIVE_BIRDS_PATH)\n", "bird_dataset.name = 'data'\n", "bird_df = bird_dataset.squeeze().drop(\"spatial_ref\").drop(\"band\").to_dataframe()\n", "\n", "bird_df['Occurrence'] = [0 if x == -1 else 1 for x in bird_df['data']]\n", "main_df = main_df.join(bird_df.drop(columns='data'))\n", "main_df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Occurrence\n", "0 28435\n", "1 7965\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Checking to see values\n", "main_df.value_counts('Occurrence')\n", "\n", "# Needs to be cleaned as there's too many rows with no data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Cleaning data\n", "# main_df = main_df.loc[main_df['Deciduous woodland']\n", "# + main_df['Coniferous woodland']\n", "# + main_df['Arable']\n", "# + main_df['Improve grassland']\n", "# + main_df['Neutral grassland']\n", "# + main_df['Calcareous grassland']\n", "# + main_df['Acid grassland']\n", "# + main_df['Fen']\n", "# + main_df['Heather']\n", "# + main_df['Heather grassland']\n", "# + main_df['Bog']\n", "# + main_df['Inland rock']\n", "# + main_df['Saltwater']\n", "# + main_df['Freshwater']\n", "# + main_df['Supralittoral rock']\n", "# + main_df['Supralittoral sediment']\n", "# + main_df['Littoral rock']\n", "# + main_df['Littoral sediment']\n", "# + main_df['Saltmarsh']\n", "# + main_df['Urban']\n", "# + main_df['Suburban'] != 0\n", "# ]\n", "# main_df.value_counts('Occurrence')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Fertiliser K', 'Fertiliser N', 'Fertiliser P', 'Chlorothalonil_5km', 'Glyphosate_5km', 'Mancozeb_5km', 'Mecoprop-P_5km', 'Metamitron_5km', 'Pendimethalin_5km', 'PropamocarbHydrochloride_5km', 'Prosulfocarb_5km', 'Sulphur_5km', 'Tri-allate_5km']\n" ] } ], "source": [ "# Show columns with null values\n", "nan_columns = []\n", "for column in main_df:\n", " if main_df[column].isnull().values.any():\n", " nan_columns.append(column)\n", "print(nan_columns)\n", "\n", "# Only the pesticides have null values" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fertiliser K Min: -3.4028230607370965e+38\n", "Fertiliser N Min: -3.4028230607370965e+38\n", "Fertiliser P Min: -3.4028230607370965e+38\n", "Chlorothalonil_5km Min: -3.3999999521443642e+38\n", "Glyphosate_5km Min: -3.3999999521443642e+38\n", "Mancozeb_5km Min: -3.3999999521443642e+38\n", "Mecoprop-P_5km Min: -3.3999999521443642e+38\n", "Metamitron_5km Min: -3.3999999521443642e+38\n", "Pendimethalin_5km Min: -3.3999999521443642e+38\n", "PropamocarbHydrochloride_5km Min: -3.3999999521443642e+38\n", "Prosulfocarb_5km Min: -3.3999999521443642e+38\n", "Sulphur_5km Min: -3.3999999521443642e+38\n", "Tri-allate_5km Min: -3.3999999521443642e+38\n" ] } ], "source": [ "# Replace null values with the minimum float value\n", "for column in main_df:\n", " if main_df[column].isnull().values.any():\n", " print(f'{column} Min: {np.nanmin(main_df[column])}')\n", " main_df[column] = main_df[column].fillna(np.nanmin(main_df[column]))\n", "\n", "# main_df = main_df.fillna(main_df.min())\n", "# old value: -3.4e+38" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Glyphosate_5kmMancozeb_5kmMecoprop-P_5kmMetamitron_5kmPendimethalin_5kmPropamocarbHydrochloride_5kmProsulfocarb_5kmSulphur_5kmTri-allate_5kmOccurrence
yx
1297500.02500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
7500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
12500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
17500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
22500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
.....................................................................
2500.0677500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
682500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
687500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
692500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
697500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
\n", "

36400 rows × 40 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1297500.0 2500.0 0 0 0 \n", " 7500.0 0 0 0 \n", " 12500.0 0 0 0 \n", " 17500.0 0 0 0 \n", " 22500.0 0 0 0 \n", "... ... ... ... \n", "2500.0 677500.0 0 0 0 \n", " 682500.0 0 0 0 \n", " 687500.0 0 0 0 \n", " 692500.0 0 0 0 \n", " 697500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1297500.0 2500.0 0 0 \n", " 7500.0 0 0 \n", " 12500.0 0 0 \n", " 17500.0 0 0 \n", " 22500.0 0 0 \n", "... ... ... \n", "2500.0 677500.0 0 0 \n", " 682500.0 0 0 \n", " 687500.0 0 0 \n", " 692500.0 0 0 \n", " 697500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1297500.0 2500.0 0 0 0 0 \n", " 7500.0 0 0 0 0 \n", " 12500.0 0 0 0 0 \n", " 17500.0 0 0 0 0 \n", " 22500.0 0 0 0 0 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 0 0 0 \n", " 682500.0 0 0 0 0 \n", " 687500.0 0 0 0 0 \n", " 692500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", "\n", " Heather grassland ... Glyphosate_5km Mancozeb_5km \\\n", "y x ... \n", "1297500.0 2500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 7500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 12500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 17500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 22500.0 0 ... -3.400000e+38 -3.400000e+38 \n", "... ... ... ... ... \n", "2500.0 677500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 682500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 687500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 692500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 697500.0 0 ... -3.400000e+38 -3.400000e+38 \n", "\n", " Mecoprop-P_5km Metamitron_5km Pendimethalin_5km \\\n", "y x \n", "1297500.0 2500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 7500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 12500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 17500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 22500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", "... ... ... ... \n", "2500.0 677500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 682500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 687500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 692500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 697500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", "\n", " PropamocarbHydrochloride_5km Prosulfocarb_5km \\\n", "y x \n", "1297500.0 2500.0 -3.400000e+38 -3.400000e+38 \n", " 7500.0 -3.400000e+38 -3.400000e+38 \n", " 12500.0 -3.400000e+38 -3.400000e+38 \n", " 17500.0 -3.400000e+38 -3.400000e+38 \n", " 22500.0 -3.400000e+38 -3.400000e+38 \n", "... ... ... \n", "2500.0 677500.0 -3.400000e+38 -3.400000e+38 \n", " 682500.0 -3.400000e+38 -3.400000e+38 \n", " 687500.0 -3.400000e+38 -3.400000e+38 \n", " 692500.0 -3.400000e+38 -3.400000e+38 \n", " 697500.0 -3.400000e+38 -3.400000e+38 \n", "\n", " Sulphur_5km Tri-allate_5km Occurrence \n", "y x \n", "1297500.0 2500.0 -3.400000e+38 -3.400000e+38 0 \n", " 7500.0 -3.400000e+38 -3.400000e+38 0 \n", " 12500.0 -3.400000e+38 -3.400000e+38 0 \n", " 17500.0 -3.400000e+38 -3.400000e+38 0 \n", " 22500.0 -3.400000e+38 -3.400000e+38 0 \n", "... ... ... ... \n", "2500.0 677500.0 -3.400000e+38 -3.400000e+38 0 \n", " 682500.0 -3.400000e+38 -3.400000e+38 0 \n", " 687500.0 -3.400000e+38 -3.400000e+38 0 \n", " 692500.0 -3.400000e+38 -3.400000e+38 0 \n", " 697500.0 -3.400000e+38 -3.400000e+38 0 \n", "\n", "[36400 rows x 40 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "main_df" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Output dataframe as csv\n", "main_df.to_csv('Datasets/Machine Learning/Dataframes/5km_All_Birds_DF.csv')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.13 ('env': venv)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "f025c48a9b67ab76bdc0400dfa0f9ba99120976b4a6ec6a63d1c946516165c91" } } }, "nbformat": 4, "nbformat_minor": 2 }