{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import rioxarray\n", "import os\n", "\n", "# <<< INVASIVE SPECIES MAP >>>\n", "INVASIVE_BIRDS_PATH = 'Datasets/Machine Learning/1km Rasters/Birds/All_Invasive_Birds_1km.tif'\n", "\n", "# <<< LAND COVER MAP >>>\n", "# Dimensions: 700000x1300000\n", "LAND_COVER_MAP_PATH = 'Datasets/Machine Learning/1km Rasters/Features/gb2021lcm1km_percentage_target.tif'\n", "\n", "# <<< FERTILISER >>>\n", "# The dataset consists of maps of the predicted average annual application rates (2010-2015) of three different inorganic \n", "# chemical fertilisers – nitrogen (N), phosphorus (P) and potassium (K) – in England across a six-year period, along with \n", "# their respective estimates of uncertainty, at a 1 km x 1 km resolution. \n", "FERTILISER_K_PATH = 'Datasets/Machine Learning/1km Rasters/Features/fertiliser_k_prediction_uncertainty.tif'\n", "FERTILISER_N_PATH = 'Datasets/Machine Learning/1km Rasters/Features/fertiliser_n_prediction_uncertainty.tif'\n", "FERTILISER_P_PATH = 'Datasets/Machine Learning/1km Rasters/Features/fertiliser_p_prediction_uncertainty.tif'\n", "\n", "# <<< PESTICIDE >>>\n", "PESTICIDE_FOLDER_PATH = 'Datasets/Machine Learning/1km Rasters/Features/Pesticides/'\n", "\n", "# <<< INTEGRATED HYDROLOGICAL DIGITAL TERRAIN MODEL >>>\n", "# Dimensions: 700000x1300000\n", "# These datasets all only have one band\n", "ELEVATION_PATH = 'Datasets/Machine Learning/1km Rasters/Features/HGHT_1km.tif'\n", "CUMULATIVE_CATCHMENT_AREA_PATH = 'Datasets/Machine Learning/1km Rasters/Features/CCAR_1km.tif'\n", "SURFACE_TYPE_PATH = 'Datasets/Machine Learning/1km Rasters/Features/SURF_1km.tif'\n", "OUTFLOWING_DRAINAGE_DIRECTION_PATH = 'Datasets/Machine Learning/1km Rasters/Features/OUTF_1km.tif'\n", "INFLOWING_DRAINAGE_PATTERN_PATH = 'Datasets/Machine Learning/1km Rasters/Features/INFL_1km.tif'\n", "\n", "IHDTM = {\n", " 'Elevation': ELEVATION_PATH, \n", " 'Cumulative catchment area': CUMULATIVE_CATCHMENT_AREA_PATH, \n", " 'Surface type': SURFACE_TYPE_PATH, \n", " 'Outflowing drainage direction': OUTFLOWING_DRAINAGE_DIRECTION_PATH, \n", " 'Inflowing drainage direction': INFLOWING_DRAINAGE_PATTERN_PATH\n", "}\n", "\n", "\n", "# <<< Opening in rasterio >>>\n", "# dataset = rasterio.open(FERTILISER_P_PATH)\n", "# data = dataset.read()\n", "# np.max(data)\n", "\n", "# <<< Opening in rioxarray >>>\n", "# dataset = rioxarray.open_rasterio(ELEVATION_PATH)\n", "# dataset.name = 'data'\n", "# df = dataset.to_dataframe()\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data\n", "0 18203710\n", "1 123393\n", "2 71220\n", "3 51085\n", "4 39528\n", " ... \n", "89 3191\n", "93 3155\n", "87 3143\n", "82 3140\n", "90 3129\n", "Length: 101, dtype: int64\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bandDeciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Inland rockSaltwaterFreshwaterSupralittoral rockSupralittoral sedimentLittoral rockLittoral sedimentSaltmarshUrbanSuburban
yx
1299500.0500.00000000000...0000000000
1500.00000000000...0000000000
2500.00000000000...0000000000
3500.00000000000...0000000000
4500.00000000000...0000000000
.....................................................................
500.0695500.00000000000...0000000000
696500.00000000000...0000000000
697500.00000000000...0000000000
698500.00000000000...0000000000
699500.00000000000...0000000000
\n", "

910000 rows × 21 columns

\n", "
" ], "text/plain": [ "band Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1299500.0 500.0 0 0 0 \n", " 1500.0 0 0 0 \n", " 2500.0 0 0 0 \n", " 3500.0 0 0 0 \n", " 4500.0 0 0 0 \n", "... ... ... ... \n", "500.0 695500.0 0 0 0 \n", " 696500.0 0 0 0 \n", " 697500.0 0 0 0 \n", " 698500.0 0 0 0 \n", " 699500.0 0 0 0 \n", "\n", "band Improve grassland Neutral grassland \\\n", "y x \n", "1299500.0 500.0 0 0 \n", " 1500.0 0 0 \n", " 2500.0 0 0 \n", " 3500.0 0 0 \n", " 4500.0 0 0 \n", "... ... ... \n", "500.0 695500.0 0 0 \n", " 696500.0 0 0 \n", " 697500.0 0 0 \n", " 698500.0 0 0 \n", " 699500.0 0 0 \n", "\n", "band Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1299500.0 500.0 0 0 0 0 \n", " 1500.0 0 0 0 0 \n", " 2500.0 0 0 0 0 \n", " 3500.0 0 0 0 0 \n", " 4500.0 0 0 0 0 \n", "... ... ... ... ... \n", "500.0 695500.0 0 0 0 0 \n", " 696500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", " 698500.0 0 0 0 0 \n", " 699500.0 0 0 0 0 \n", "\n", "band Heather grassland ... Inland rock Saltwater \\\n", "y x ... \n", "1299500.0 500.0 0 ... 0 0 \n", " 1500.0 0 ... 0 0 \n", " 2500.0 0 ... 0 0 \n", " 3500.0 0 ... 0 0 \n", " 4500.0 0 ... 0 0 \n", "... ... ... ... ... \n", "500.0 695500.0 0 ... 0 0 \n", " 696500.0 0 ... 0 0 \n", " 697500.0 0 ... 0 0 \n", " 698500.0 0 ... 0 0 \n", " 699500.0 0 ... 0 0 \n", "\n", "band Freshwater Supralittoral rock Supralittoral sediment \\\n", "y x \n", "1299500.0 500.0 0 0 0 \n", " 1500.0 0 0 0 \n", " 2500.0 0 0 0 \n", " 3500.0 0 0 0 \n", " 4500.0 0 0 0 \n", "... ... ... ... \n", "500.0 695500.0 0 0 0 \n", " 696500.0 0 0 0 \n", " 697500.0 0 0 0 \n", " 698500.0 0 0 0 \n", " 699500.0 0 0 0 \n", "\n", "band Littoral rock Littoral sediment Saltmarsh Urban \\\n", "y x \n", "1299500.0 500.0 0 0 0 0 \n", " 1500.0 0 0 0 0 \n", " 2500.0 0 0 0 0 \n", " 3500.0 0 0 0 0 \n", " 4500.0 0 0 0 0 \n", "... ... ... ... ... \n", "500.0 695500.0 0 0 0 0 \n", " 696500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", " 698500.0 0 0 0 0 \n", " 699500.0 0 0 0 0 \n", "\n", "band Suburban \n", "y x \n", "1299500.0 500.0 0 \n", " 1500.0 0 \n", " 2500.0 0 \n", " 3500.0 0 \n", " 4500.0 0 \n", "... ... \n", "500.0 695500.0 0 \n", " 696500.0 0 \n", " 697500.0 0 \n", " 698500.0 0 \n", " 699500.0 0 \n", "\n", "[910000 rows x 21 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Open the land cover map raster file, transform its bands into feature columns and use this as the base \n", "# for the main dataframe as this has the widest extent out of all the files and is arguably one of the \n", "# most important dataframes\n", "\n", "lcm = rioxarray.open_rasterio(LAND_COVER_MAP_PATH)\n", "lcm.name = 'data'\n", "main_df = lcm.to_dataframe().drop(columns='spatial_ref')\n", "# print(main_df.value_counts()) \n", "main_df = main_df.unstack(level='band')\n", "\n", "LCM_CLASSES = [\n", " 'Deciduous woodland', \n", " 'Coniferous woodland', \n", " 'Arable', \n", " 'Improve grassland', \n", " 'Neutral grassland', \n", " 'Calcareous grassland', \n", " 'Acid grassland', \n", " 'Fen', \n", " 'Heather', \n", " 'Heather grassland', \n", " 'Bog',\n", " 'Inland rock', \n", " 'Saltwater',\n", " 'Freshwater',\n", " 'Supralittoral rock',\n", " 'Supralittoral sediment',\n", " 'Littoral rock',\n", " 'Littoral sediment',\n", " 'Saltmarsh',\n", " 'Urban',\n", " 'Suburban'\n", " ]\n", "\n", "main_df = main_df['data'].rename(columns={i+1: j for i, j in enumerate(LCM_CLASSES)})\n", "main_df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...Littoral rockLittoral sedimentSaltmarshUrbanSuburbanElevationCumulative catchment areaSurface typeOutflowing drainage directionInflowing drainage direction
yx
1299500.0500.00000000000...00000-9999-9999-1-1255
1500.00000000000...00000-9999-9999-1-1255
2500.00000000000...00000-9999-9999-1-1255
3500.00000000000...00000-9999-9999-1-1255
4500.00000000000...00000-9999-9999-1-1255
.....................................................................
500.0695500.00000000000...00000-9999-9999-1-1255
696500.00000000000...00000-9999-9999-1-1255
697500.00000000000...00000-9999-9999-1-1255
698500.00000000000...00000-9999-9999-1-1255
699500.00000000000...00000-9999-9999-1-1255
\n", "

910000 rows × 26 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1299500.0 500.0 0 0 0 \n", " 1500.0 0 0 0 \n", " 2500.0 0 0 0 \n", " 3500.0 0 0 0 \n", " 4500.0 0 0 0 \n", "... ... ... ... \n", "500.0 695500.0 0 0 0 \n", " 696500.0 0 0 0 \n", " 697500.0 0 0 0 \n", " 698500.0 0 0 0 \n", " 699500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1299500.0 500.0 0 0 \n", " 1500.0 0 0 \n", " 2500.0 0 0 \n", " 3500.0 0 0 \n", " 4500.0 0 0 \n", "... ... ... \n", "500.0 695500.0 0 0 \n", " 696500.0 0 0 \n", " 697500.0 0 0 \n", " 698500.0 0 0 \n", " 699500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1299500.0 500.0 0 0 0 0 \n", " 1500.0 0 0 0 0 \n", " 2500.0 0 0 0 0 \n", " 3500.0 0 0 0 0 \n", " 4500.0 0 0 0 0 \n", "... ... ... ... ... \n", "500.0 695500.0 0 0 0 0 \n", " 696500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", " 698500.0 0 0 0 0 \n", " 699500.0 0 0 0 0 \n", "\n", " Heather grassland ... Littoral rock Littoral sediment \\\n", "y x ... \n", "1299500.0 500.0 0 ... 0 0 \n", " 1500.0 0 ... 0 0 \n", " 2500.0 0 ... 0 0 \n", " 3500.0 0 ... 0 0 \n", " 4500.0 0 ... 0 0 \n", "... ... ... ... ... \n", "500.0 695500.0 0 ... 0 0 \n", " 696500.0 0 ... 0 0 \n", " 697500.0 0 ... 0 0 \n", " 698500.0 0 ... 0 0 \n", " 699500.0 0 ... 0 0 \n", "\n", " Saltmarsh Urban Suburban Elevation \\\n", "y x \n", "1299500.0 500.0 0 0 0 -9999 \n", " 1500.0 0 0 0 -9999 \n", " 2500.0 0 0 0 -9999 \n", " 3500.0 0 0 0 -9999 \n", " 4500.0 0 0 0 -9999 \n", "... ... ... ... ... \n", "500.0 695500.0 0 0 0 -9999 \n", " 696500.0 0 0 0 -9999 \n", " 697500.0 0 0 0 -9999 \n", " 698500.0 0 0 0 -9999 \n", " 699500.0 0 0 0 -9999 \n", "\n", " Cumulative catchment area Surface type \\\n", "y x \n", "1299500.0 500.0 -9999 -1 \n", " 1500.0 -9999 -1 \n", " 2500.0 -9999 -1 \n", " 3500.0 -9999 -1 \n", " 4500.0 -9999 -1 \n", "... ... ... \n", "500.0 695500.0 -9999 -1 \n", " 696500.0 -9999 -1 \n", " 697500.0 -9999 -1 \n", " 698500.0 -9999 -1 \n", " 699500.0 -9999 -1 \n", "\n", " Outflowing drainage direction \\\n", "y x \n", "1299500.0 500.0 -1 \n", " 1500.0 -1 \n", " 2500.0 -1 \n", " 3500.0 -1 \n", " 4500.0 -1 \n", "... ... \n", "500.0 695500.0 -1 \n", " 696500.0 -1 \n", " 697500.0 -1 \n", " 698500.0 -1 \n", " 699500.0 -1 \n", "\n", " Inflowing drainage direction \n", "y x \n", "1299500.0 500.0 255 \n", " 1500.0 255 \n", " 2500.0 255 \n", " 3500.0 255 \n", " 4500.0 255 \n", "... ... \n", "500.0 695500.0 255 \n", " 696500.0 255 \n", " 697500.0 255 \n", " 698500.0 255 \n", " 699500.0 255 \n", "\n", "[910000 rows x 26 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For each IHDTM file, append its raster data to the main dataframe\n", "for key in IHDTM:\n", " ihdtm_data = rioxarray.open_rasterio(IHDTM[key])\n", " ihdtm_data = ihdtm_data.squeeze().drop(\"spatial_ref\").drop(\"band\")\n", " ihdtm_data.name = key\n", " ihdtm_df = ihdtm_data.to_dataframe()\n", "\n", " # Adding 25 to x and y coordinates to match index of other datasets\n", " ihdtm_df.index = ihdtm_df.index.set_levels(ihdtm_df.index.levels[0]+25, level=0)\n", " ihdtm_df.index = ihdtm_df.index.set_levels(ihdtm_df.index.levels[1]+25, level=1)\n", " main_df = main_df.join(ihdtm_df)\n", "main_df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...UrbanSuburbanElevationCumulative catchment areaSurface typeOutflowing drainage directionInflowing drainage directionFertiliser KFertiliser NFertiliser P
yx
1299500.0500.00000000000...00-9999-9999-1-1255NaNNaNNaN
1500.00000000000...00-9999-9999-1-1255NaNNaNNaN
2500.00000000000...00-9999-9999-1-1255NaNNaNNaN
3500.00000000000...00-9999-9999-1-1255NaNNaNNaN
4500.00000000000...00-9999-9999-1-1255NaNNaNNaN
.....................................................................
500.0695500.00000000000...00-9999-9999-1-1255NaNNaNNaN
696500.00000000000...00-9999-9999-1-1255NaNNaNNaN
697500.00000000000...00-9999-9999-1-1255NaNNaNNaN
698500.00000000000...00-9999-9999-1-1255NaNNaNNaN
699500.00000000000...00-9999-9999-1-1255NaNNaNNaN
\n", "

910000 rows × 29 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1299500.0 500.0 0 0 0 \n", " 1500.0 0 0 0 \n", " 2500.0 0 0 0 \n", " 3500.0 0 0 0 \n", " 4500.0 0 0 0 \n", "... ... ... ... \n", "500.0 695500.0 0 0 0 \n", " 696500.0 0 0 0 \n", " 697500.0 0 0 0 \n", " 698500.0 0 0 0 \n", " 699500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1299500.0 500.0 0 0 \n", " 1500.0 0 0 \n", " 2500.0 0 0 \n", " 3500.0 0 0 \n", " 4500.0 0 0 \n", "... ... ... \n", "500.0 695500.0 0 0 \n", " 696500.0 0 0 \n", " 697500.0 0 0 \n", " 698500.0 0 0 \n", " 699500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1299500.0 500.0 0 0 0 0 \n", " 1500.0 0 0 0 0 \n", " 2500.0 0 0 0 0 \n", " 3500.0 0 0 0 0 \n", " 4500.0 0 0 0 0 \n", "... ... ... ... ... \n", "500.0 695500.0 0 0 0 0 \n", " 696500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", " 698500.0 0 0 0 0 \n", " 699500.0 0 0 0 0 \n", "\n", " Heather grassland ... Urban Suburban Elevation \\\n", "y x ... \n", "1299500.0 500.0 0 ... 0 0 -9999 \n", " 1500.0 0 ... 0 0 -9999 \n", " 2500.0 0 ... 0 0 -9999 \n", " 3500.0 0 ... 0 0 -9999 \n", " 4500.0 0 ... 0 0 -9999 \n", "... ... ... ... ... ... \n", "500.0 695500.0 0 ... 0 0 -9999 \n", " 696500.0 0 ... 0 0 -9999 \n", " 697500.0 0 ... 0 0 -9999 \n", " 698500.0 0 ... 0 0 -9999 \n", " 699500.0 0 ... 0 0 -9999 \n", "\n", " Cumulative catchment area Surface type \\\n", "y x \n", "1299500.0 500.0 -9999 -1 \n", " 1500.0 -9999 -1 \n", " 2500.0 -9999 -1 \n", " 3500.0 -9999 -1 \n", " 4500.0 -9999 -1 \n", "... ... ... \n", "500.0 695500.0 -9999 -1 \n", " 696500.0 -9999 -1 \n", " 697500.0 -9999 -1 \n", " 698500.0 -9999 -1 \n", " 699500.0 -9999 -1 \n", "\n", " Outflowing drainage direction \\\n", "y x \n", "1299500.0 500.0 -1 \n", " 1500.0 -1 \n", " 2500.0 -1 \n", " 3500.0 -1 \n", " 4500.0 -1 \n", "... ... \n", "500.0 695500.0 -1 \n", " 696500.0 -1 \n", " 697500.0 -1 \n", " 698500.0 -1 \n", " 699500.0 -1 \n", "\n", " Inflowing drainage direction Fertiliser K Fertiliser N \\\n", "y x \n", "1299500.0 500.0 255 NaN NaN \n", " 1500.0 255 NaN NaN \n", " 2500.0 255 NaN NaN \n", " 3500.0 255 NaN NaN \n", " 4500.0 255 NaN NaN \n", "... ... ... ... \n", "500.0 695500.0 255 NaN NaN \n", " 696500.0 255 NaN NaN \n", " 697500.0 255 NaN NaN \n", " 698500.0 255 NaN NaN \n", " 699500.0 255 NaN NaN \n", "\n", " Fertiliser P \n", "y x \n", "1299500.0 500.0 NaN \n", " 1500.0 NaN \n", " 2500.0 NaN \n", " 3500.0 NaN \n", " 4500.0 NaN \n", "... ... \n", "500.0 695500.0 NaN \n", " 696500.0 NaN \n", " 697500.0 NaN \n", " 698500.0 NaN \n", " 699500.0 NaN \n", "\n", "[910000 rows x 29 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For each fertiliser, append its raster data to the main dataframe\n", "fertiliser = {'Fertiliser K' : FERTILISER_K_PATH, 'Fertiliser N' : FERTILISER_N_PATH, 'Fertiliser P' : FERTILISER_P_PATH}\n", "\n", "for key in fertiliser:\n", " fert_dataset = rioxarray.open_rasterio(fertiliser[key])\n", " fert_dataset.name = key\n", " fert_df = fert_dataset.to_dataframe().drop(columns='spatial_ref')\n", " fert_df = fert_df.drop(index=2).droplevel('band')\n", " main_df = main_df.join(fert_df)\n", "main_df\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...ChlorothalonilGlyphosateMancozebMecoprop-PMetamitronPendimethalinPropamocarbHydrochlorideProsulfocarbSulphurTri-allate
yx
1299500.0500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
.....................................................................
500.0695500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
696500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
697500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
698500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
699500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

910000 rows × 39 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1299500.0 500.0 0 0 0 \n", " 1500.0 0 0 0 \n", " 2500.0 0 0 0 \n", " 3500.0 0 0 0 \n", " 4500.0 0 0 0 \n", "... ... ... ... \n", "500.0 695500.0 0 0 0 \n", " 696500.0 0 0 0 \n", " 697500.0 0 0 0 \n", " 698500.0 0 0 0 \n", " 699500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1299500.0 500.0 0 0 \n", " 1500.0 0 0 \n", " 2500.0 0 0 \n", " 3500.0 0 0 \n", " 4500.0 0 0 \n", "... ... ... \n", "500.0 695500.0 0 0 \n", " 696500.0 0 0 \n", " 697500.0 0 0 \n", " 698500.0 0 0 \n", " 699500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1299500.0 500.0 0 0 0 0 \n", " 1500.0 0 0 0 0 \n", " 2500.0 0 0 0 0 \n", " 3500.0 0 0 0 0 \n", " 4500.0 0 0 0 0 \n", "... ... ... ... ... \n", "500.0 695500.0 0 0 0 0 \n", " 696500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", " 698500.0 0 0 0 0 \n", " 699500.0 0 0 0 0 \n", "\n", " Heather grassland ... Chlorothalonil Glyphosate \\\n", "y x ... \n", "1299500.0 500.0 0 ... NaN NaN \n", " 1500.0 0 ... NaN NaN \n", " 2500.0 0 ... NaN NaN \n", " 3500.0 0 ... NaN NaN \n", " 4500.0 0 ... NaN NaN \n", "... ... ... ... ... \n", "500.0 695500.0 0 ... NaN NaN \n", " 696500.0 0 ... NaN NaN \n", " 697500.0 0 ... NaN NaN \n", " 698500.0 0 ... NaN NaN \n", " 699500.0 0 ... NaN NaN \n", "\n", " Mancozeb Mecoprop-P Metamitron Pendimethalin \\\n", "y x \n", "1299500.0 500.0 NaN NaN NaN NaN \n", " 1500.0 NaN NaN NaN NaN \n", " 2500.0 NaN NaN NaN NaN \n", " 3500.0 NaN NaN NaN NaN \n", " 4500.0 NaN NaN NaN NaN \n", "... ... ... ... ... \n", "500.0 695500.0 NaN NaN NaN NaN \n", " 696500.0 NaN NaN NaN NaN \n", " 697500.0 NaN NaN NaN NaN \n", " 698500.0 NaN NaN NaN NaN \n", " 699500.0 NaN NaN NaN NaN \n", "\n", " PropamocarbHydrochloride Prosulfocarb Sulphur \\\n", "y x \n", "1299500.0 500.0 NaN NaN NaN \n", " 1500.0 NaN NaN NaN \n", " 2500.0 NaN NaN NaN \n", " 3500.0 NaN NaN NaN \n", " 4500.0 NaN NaN NaN \n", "... ... ... ... \n", "500.0 695500.0 NaN NaN NaN \n", " 696500.0 NaN NaN NaN \n", " 697500.0 NaN NaN NaN \n", " 698500.0 NaN NaN NaN \n", " 699500.0 NaN NaN NaN \n", "\n", " Tri-allate \n", "y x \n", "1299500.0 500.0 NaN \n", " 1500.0 NaN \n", " 2500.0 NaN \n", " 3500.0 NaN \n", " 4500.0 NaN \n", "... ... \n", "500.0 695500.0 NaN \n", " 696500.0 NaN \n", " 697500.0 NaN \n", " 698500.0 NaN \n", " 699500.0 NaN \n", "\n", "[910000 rows x 39 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For each file in pesticide folder, append its raster data to the main dataframe\n", "for file in os.listdir(PESTICIDE_FOLDER_PATH):\n", " filename = os.fsdecode(file)\n", " if not filename.endswith('.tif'):\n", " continue\n", " \n", " pest_dataset = rioxarray.open_rasterio(PESTICIDE_FOLDER_PATH+filename)\n", " pest_dataset.name = filename[:-4]\n", " pest_df = pest_dataset.to_dataframe().drop(columns='spatial_ref')\n", " pest_df = pest_df.drop(index=2).droplevel('band')\n", " main_df = main_df.join(pest_df)\n", "main_df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...GlyphosateMancozebMecoprop-PMetamitronPendimethalinPropamocarbHydrochlorideProsulfocarbSulphurTri-allateOccurrence
yx
1299500.0500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
1500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
2500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
3500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
4500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
.....................................................................
500.0695500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
696500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
697500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
698500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
699500.00000000000...NaNNaNNaNNaNNaNNaNNaNNaNNaN0
\n", "

910000 rows × 40 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1299500.0 500.0 0 0 0 \n", " 1500.0 0 0 0 \n", " 2500.0 0 0 0 \n", " 3500.0 0 0 0 \n", " 4500.0 0 0 0 \n", "... ... ... ... \n", "500.0 695500.0 0 0 0 \n", " 696500.0 0 0 0 \n", " 697500.0 0 0 0 \n", " 698500.0 0 0 0 \n", " 699500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1299500.0 500.0 0 0 \n", " 1500.0 0 0 \n", " 2500.0 0 0 \n", " 3500.0 0 0 \n", " 4500.0 0 0 \n", "... ... ... \n", "500.0 695500.0 0 0 \n", " 696500.0 0 0 \n", " 697500.0 0 0 \n", " 698500.0 0 0 \n", " 699500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1299500.0 500.0 0 0 0 0 \n", " 1500.0 0 0 0 0 \n", " 2500.0 0 0 0 0 \n", " 3500.0 0 0 0 0 \n", " 4500.0 0 0 0 0 \n", "... ... ... ... ... \n", "500.0 695500.0 0 0 0 0 \n", " 696500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", " 698500.0 0 0 0 0 \n", " 699500.0 0 0 0 0 \n", "\n", " Heather grassland ... Glyphosate Mancozeb Mecoprop-P \\\n", "y x ... \n", "1299500.0 500.0 0 ... NaN NaN NaN \n", " 1500.0 0 ... NaN NaN NaN \n", " 2500.0 0 ... NaN NaN NaN \n", " 3500.0 0 ... NaN NaN NaN \n", " 4500.0 0 ... NaN NaN NaN \n", "... ... ... ... ... ... \n", "500.0 695500.0 0 ... NaN NaN NaN \n", " 696500.0 0 ... NaN NaN NaN \n", " 697500.0 0 ... NaN NaN NaN \n", " 698500.0 0 ... NaN NaN NaN \n", " 699500.0 0 ... NaN NaN NaN \n", "\n", " Metamitron Pendimethalin PropamocarbHydrochloride \\\n", "y x \n", "1299500.0 500.0 NaN NaN NaN \n", " 1500.0 NaN NaN NaN \n", " 2500.0 NaN NaN NaN \n", " 3500.0 NaN NaN NaN \n", " 4500.0 NaN NaN NaN \n", "... ... ... ... \n", "500.0 695500.0 NaN NaN NaN \n", " 696500.0 NaN NaN NaN \n", " 697500.0 NaN NaN NaN \n", " 698500.0 NaN NaN NaN \n", " 699500.0 NaN NaN NaN \n", "\n", " Prosulfocarb Sulphur Tri-allate Occurrence \n", "y x \n", "1299500.0 500.0 NaN NaN NaN 0 \n", " 1500.0 NaN NaN NaN 0 \n", " 2500.0 NaN NaN NaN 0 \n", " 3500.0 NaN NaN NaN 0 \n", " 4500.0 NaN NaN NaN 0 \n", "... ... ... ... ... \n", "500.0 695500.0 NaN NaN NaN 0 \n", " 696500.0 NaN NaN NaN 0 \n", " 697500.0 NaN NaN NaN 0 \n", " 698500.0 NaN NaN NaN 0 \n", " 699500.0 NaN NaN NaN 0 \n", "\n", "[910000 rows x 40 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Label data with entire bird dataset\n", "# -> Convert every non-null value into 1s and every null value into 0\n", "\n", "bird_dataset = rioxarray.open_rasterio(INVASIVE_BIRDS_PATH)\n", "bird_dataset.name = 'data'\n", "bird_df = bird_dataset.squeeze().drop(\"spatial_ref\").drop(\"band\").to_dataframe()\n", "\n", "bird_df['Occurrence'] = [0 if x == -1 else 1 for x in bird_df['data']]\n", "main_df = main_df.join(bird_df.drop(columns='data'))\n", "main_df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Occurrence\n", "0 876916\n", "1 33084\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Checking to see values\n", "main_df.value_counts('Occurrence')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Cleaning data\n", "# main_df = main_df.loc[main_df['Deciduous woodland']\n", "# + main_df['Coniferous woodland']\n", "# + main_df['Arable']\n", "# + main_df['Improve grassland']\n", "# + main_df['Neutral grassland']\n", "# + main_df['Calcareous grassland']\n", "# + main_df['Acid grassland']\n", "# + main_df['Fen']\n", "# + main_df['Heather']\n", "# + main_df['Heather grassland']\n", "# + main_df['Bog']\n", "# + main_df['Inland rock']\n", "# + main_df['Saltwater']\n", "# + main_df['Freshwater']\n", "# + main_df['Supralittoral rock']\n", "# + main_df['Supralittoral sediment']\n", "# + main_df['Littoral rock']\n", "# + main_df['Littoral sediment']\n", "# + main_df['Saltmarsh']\n", "# + main_df['Urban']\n", "# + main_df['Suburban'] != 0\n", "# ]\n", "# main_df.value_counts('Occurrence')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Fertiliser K', 'Fertiliser N', 'Fertiliser P', 'Chlorothalonil', 'Glyphosate', 'Mancozeb', 'Mecoprop-P', 'Metamitron', 'Pendimethalin', 'PropamocarbHydrochloride', 'Prosulfocarb', 'Sulphur', 'Tri-allate']\n" ] } ], "source": [ "# Show columns with null values\n", "nan_columns = []\n", "for column in main_df:\n", " if main_df[column].isnull().values.any():\n", " nan_columns.append(column)\n", "print(nan_columns)\n", "\n", "# Only the pesticides have null values" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Replace null values with the minimum float value\n", "main_df = main_df.fillna(main_df.min())\n", "# old value: -3.4e+38" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deciduous woodlandConiferous woodlandArableImprove grasslandNeutral grasslandCalcareous grasslandAcid grasslandFenHeatherHeather grassland...GlyphosateMancozebMecoprop-PMetamitronPendimethalinPropamocarbHydrochlorideProsulfocarbSulphurTri-allateOccurrence
yx
1299500.0500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
1500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
2500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
3500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
4500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
.....................................................................
500.0695500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
696500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
697500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
698500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
699500.00000000000...-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+38-3.400000e+380
\n", "

910000 rows × 40 columns

\n", "
" ], "text/plain": [ " Deciduous woodland Coniferous woodland Arable \\\n", "y x \n", "1299500.0 500.0 0 0 0 \n", " 1500.0 0 0 0 \n", " 2500.0 0 0 0 \n", " 3500.0 0 0 0 \n", " 4500.0 0 0 0 \n", "... ... ... ... \n", "500.0 695500.0 0 0 0 \n", " 696500.0 0 0 0 \n", " 697500.0 0 0 0 \n", " 698500.0 0 0 0 \n", " 699500.0 0 0 0 \n", "\n", " Improve grassland Neutral grassland \\\n", "y x \n", "1299500.0 500.0 0 0 \n", " 1500.0 0 0 \n", " 2500.0 0 0 \n", " 3500.0 0 0 \n", " 4500.0 0 0 \n", "... ... ... \n", "500.0 695500.0 0 0 \n", " 696500.0 0 0 \n", " 697500.0 0 0 \n", " 698500.0 0 0 \n", " 699500.0 0 0 \n", "\n", " Calcareous grassland Acid grassland Fen Heather \\\n", "y x \n", "1299500.0 500.0 0 0 0 0 \n", " 1500.0 0 0 0 0 \n", " 2500.0 0 0 0 0 \n", " 3500.0 0 0 0 0 \n", " 4500.0 0 0 0 0 \n", "... ... ... ... ... \n", "500.0 695500.0 0 0 0 0 \n", " 696500.0 0 0 0 0 \n", " 697500.0 0 0 0 0 \n", " 698500.0 0 0 0 0 \n", " 699500.0 0 0 0 0 \n", "\n", " Heather grassland ... Glyphosate Mancozeb \\\n", "y x ... \n", "1299500.0 500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 1500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 2500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 3500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 4500.0 0 ... -3.400000e+38 -3.400000e+38 \n", "... ... ... ... ... \n", "500.0 695500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 696500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 697500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 698500.0 0 ... -3.400000e+38 -3.400000e+38 \n", " 699500.0 0 ... -3.400000e+38 -3.400000e+38 \n", "\n", " Mecoprop-P Metamitron Pendimethalin \\\n", "y x \n", "1299500.0 500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 1500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 2500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 3500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 4500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", "... ... ... ... \n", "500.0 695500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 696500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 697500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 698500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 699500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", "\n", " PropamocarbHydrochloride Prosulfocarb Sulphur \\\n", "y x \n", "1299500.0 500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 1500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 2500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 3500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 4500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", "... ... ... ... \n", "500.0 695500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 696500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 697500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 698500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", " 699500.0 -3.400000e+38 -3.400000e+38 -3.400000e+38 \n", "\n", " Tri-allate Occurrence \n", "y x \n", "1299500.0 500.0 -3.400000e+38 0 \n", " 1500.0 -3.400000e+38 0 \n", " 2500.0 -3.400000e+38 0 \n", " 3500.0 -3.400000e+38 0 \n", " 4500.0 -3.400000e+38 0 \n", "... ... ... \n", "500.0 695500.0 -3.400000e+38 0 \n", " 696500.0 -3.400000e+38 0 \n", " 697500.0 -3.400000e+38 0 \n", " 698500.0 -3.400000e+38 0 \n", " 699500.0 -3.400000e+38 0 \n", "\n", "[910000 rows x 40 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "main_df" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Output dataframe as csv\n", "main_df.to_csv('Datasets/Machine Learning/Dataframes/1km_All_Birds_DF.csv')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.13 ('env': venv)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "f025c48a9b67ab76bdc0400dfa0f9ba99120976b4a6ec6a63d1c946516165c91" } } }, "nbformat": 4, "nbformat_minor": 2 }