Biodiversity Hotspots Exploration

Alexander Dunkel, Institute of Cartography, TU Dresden

•••
Out[1]:

Last updated: Aug-19-2023, Carto-Lab Docker Version 0.14.0

Visualization of temporal patterns for geo-social media posts for Biodiversity Hotspots in Germany.

Preparations

In [2]:
import os, sys
from pathlib import Path
import psycopg2
import geopandas as gp
import pandas as pd
import seaborn as sns
import calendar
import textwrap
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import matplotlib.patheffects as pe
from typing import List, Tuple, Dict, Optional, Any
from IPython.display import clear_output, display, HTML
from datetime import datetime
In [61]:
module_path = str(Path.cwd().parents[0] / "py")
if module_path not in sys.path:
    sys.path.append(module_path)
from modules.base import tools, hll, preparations
from modules.base.hll import union_hll, cardinality_hll
In [4]:
%load_ext autoreload
%autoreload 2
In [5]:
OUTPUT = Path.cwd().parents[0] / "out"       # output directory for figures (etc.)
WORK_DIR = Path.cwd().parents[0] / "tmp"     # Working directory
In [6]:
(OUTPUT / "figures").mkdir(exist_ok=True)
(OUTPUT / "svg").mkdir(exist_ok=True)
WORK_DIR.mkdir(exist_ok=True)

Set global font

In [7]:
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif']

Preview Hotspot regions

In [8]:
df_hotspots = gp.read_file(
    Path.cwd().parents[0] / "00_data" / "hotspots" / "hotspots2021.shp")
In [9]:
epsg_code = df_hotspots.crs.to_epsg()
print(epsg_code)
25832

Get Shapefile for Germany

In [10]:
source_zip = "https://daten.gdz.bkg.bund.de/produkte/vg/vg2500/aktuell/"
filename = "vg2500_12-31.utm32s.shape.zip"
shapes_name = "vg2500_12-31.utm32s.shape/vg2500/VG2500_LAN.shp"
In [11]:
SHAPE_DIR = (OUTPUT / "shapes")
SHAPE_DIR.mkdir(exist_ok=True)

if not (SHAPE_DIR / shapes_name).exists():
    tools.get_zip_extract(uri=source_zip, filename=filename, output_path=SHAPE_DIR)
else:
    print("Already exists")
Already exists
In [12]:
shapes = gp.read_file(SHAPE_DIR / shapes_name)
shapes = shapes.to_crs(f"EPSG:{epsg_code}")

Create overlay

In [13]:
ax = shapes.plot(color='none', edgecolor='black', linewidth=0.2, figsize=(2, 4))
ax = df_hotspots.plot(ax=ax, color='green')
ax.set_axis_off()

Load HLL aggregate data

In [14]:
HOTSPOT_ALL = OUTPUT /  "hotspot_all_months.csv"
METRIC = "postcount"
# METRIC = "usercount"
UPPER_YEAR = 2023
LOWER_YEAR = 2007

Preview

In [15]:
data_files = {
    "HOTSPOT_ALL":HOTSPOT_ALL,
    }
tools.display_file_stats(data_files)
name HOTSPOT_ALL
size 8.65 MB
records 12,814
In [16]:
df = pd.read_csv(HOTSPOT_ALL)
In [17]:
pd.options.display.width = 0
df.head(10)
Out[17]:
year month post_hll user_hll hotspot_id origin_id
0 2007 1 \x138b405601e3e1 \x138b4038856e82 1 2
1 2007 1 \x138b40004600e1046507a1090109210a210a410be110... \x138b4007a109e1124138853c4440414ae26022616179... 2 2
2 2007 1 \x138b404d218365 \x138b40ebc1 3 2
3 2007 1 \x138b40000301410241032404c105c3068306e109c20a... \x138b4009e11ee3218421a6228122e128412ea135e13a... 4 2
4 2007 1 \x138b4001c21162470156226e2178a18e41a6e1b661eee3 \x138b40e682 5 2
5 2007 1 \x138b4004630783148314e11ec226a329a32bc12da237... \x138b402fc136233da165a46d228b61b8c2c402df66ec82 6 2
6 2007 1 \x138b40012303a207a3090109610b030be30c8111011d... \x138b4003410781110115e11f022ea837a543a39883b9... 7 2
7 2007 1 \x138b40016303e105440c240e630f8111a113e2142215... \x138b407421c181efe1f1c1 8 2
8 2007 1 \x138b403ec63f6357c17f8281218c23a163a4a1cc01ee... \x138b4002611fa18ba39702d724 9 2
9 2007 1 \x138b4000610142018102c20901098210621181122216... \x138b4001a605821d011d83216222622fc23101388241... 10 2

Get distinct hotspot_ids, to check completeness:

In [18]:
print(sorted(df['hotspot_id'].unique()))
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]

Connect hll worker db

In [19]:
DB_USER = "hlluser"
DB_PASS = os.getenv('READONLY_USER_PASSWORD')
# set connection variables
DB_HOST = "hllworkerdb"
DB_PORT = "5432"
DB_NAME = "hllworkerdb"

Connect to empty Postgres database running HLL Extension:

In [20]:
DB_CONN = psycopg2.connect(
        host=DB_HOST,
        port=DB_PORT ,
        dbname=DB_NAME,
        user=DB_USER,
        password=DB_PASS
)
DB_CONN.set_session(
    readonly=True)
DB_CALC = tools.DbConn(
    DB_CONN)
CUR_HLL = DB_CONN.cursor()
In [21]:
db_conn = tools.DbConn(DB_CONN)

Calculate HLL Cardinality per month

Define additional functions for reading and formatting CSV as pd.DataFrame

In [22]:
TIMESTAMP_FORMAT = '%Y %m'

def read_csv_datetime(csv: Path, timestamp_format: str = TIMESTAMP_FORMAT) -> pd.DataFrame:
    """Read CSV with parsing datetime index (months)
    
        First CSV column: Year
        Second CSV column: Month
    """
    date_cols = ["year", "month"]
    df = pd.read_csv(
        csv, index_col='datetime', 
        parse_dates={'datetime':date_cols},
        date_format=timestamp_format,
        keep_date_col='False')
    df.drop(columns=date_cols, inplace=True)
    return df
    
def append_cardinality_df(df: pd.DataFrame, hll_col: str = "post_hll", cardinality_col: str = 'postcount_est'):
    """Calculate cardinality from HLL and append to extra column in df"""
    df[cardinality_col] = df.apply(
        lambda x: cardinality_hll(
           x[hll_col], CUR_HLL),
        axis=1)
    df.drop(columns=[hll_col], inplace=True)
    return df

def filter_fill_time(
        df: pd.DataFrame, min_year: int, 
        max_year: int, val_col: str = "postcount_est",
        min_month: int = 1, max_month: int = 1):
    """Filter time values between min - max year and fill missing values"""
    min_date = pd.Timestamp(f'{min_year}-{min_month}-01')
    max_date = pd.Timestamp(f'{max_year}-{max_month}-01')
    # clip by start and end date
    if not min_date in df.index:
        df.loc[min_date, val_col] = 0
    if not max_date in df.index:
        df.loc[max_date, val_col] = 0
    df.sort_index(inplace=True)
    # mask min and max time
    time_mask = ((df.index >= min_date) & (df.index <= max_date))
    # fill missing months with 0
    # this will also set the day to max of month
    series = df.loc[time_mask][val_col].resample('M').sum().fillna(0)
    return series.to_frame()

Apply functions to all data sets.

  • Read from CSV
  • split by origin_id
  • calculate cardinality
  • merge year and month to single column
  • filter 2007 - 2021 range, fill missing values
In [23]:
df_post = read_csv_datetime(HOTSPOT_ALL)
In [24]:
source_names = {
    1: "Instagram", 2: "Flickr", 3: "Twitter", 23: "iNaturalist"}

Split by data source, create copies of original dataframe., and process.

In [25]:
def process_df(
        df: pd.DataFrame, metric: str = METRIC, upper_year: int = UPPER_YEAR, lower_year: int = LOWER_YEAR):
    """Process df: append_cardinality() and filter_fill_time()"""
    if metric == "postcount":
        hll_col= "post_hll"
        cardinality_col = 'postcount_est'
    else:
        hll_col= "user_hll"
        cardinality_col = 'usercount_est'
    df = append_cardinality_df(df, hll_col, cardinality_col)
    df = filter_fill_time(df, lower_year, upper_year, cardinality_col)
    # fill where no data avilable
    df.fillna(0, inplace=True)
    return df
In [26]:
vis_df = []
for idx, name in source_names.items():
    sel_df = df_post[df_post["origin_id"]==idx].copy()
    vis_df.append(process_df(sel_df))
In [27]:
vis_df[0].head(5)
Out[27]:
postcount_est
datetime
2007-01-31 0.0
2007-02-28 0.0
2007-03-31 0.0
2007-04-30 0.0
2007-05-31 0.0
In [28]:
def plot_lines(
        df_list: List[pd.DataFrame],  
        xlegend: str = "Year", title: Optional[str] = None, metric: str = METRIC):
    """Plot lines from a list of DataFrames"""
    if metric == "postcount":
        hll_col: str = "post_hll"
        cardinality_col: str = 'postcount_est'
        ylegend: str = "Post count"
    else:
        hll_col: str = "user_hll"
        cardinality_col: str = 'usercount_est'
        ylegend: str = "User count"
    fig, ax = plt.subplots()
    fig.set_size_inches(15.7, 4.27)
    ylabel = f'{ylegend} (estimate)'
    for ix, df in enumerate(df_list):
        source_name = source_names.get(list(source_names)[ix])
        ax = df[cardinality_col].plot(ax=ax, kind='line', label=source_name)
    tick_loc = mticker.MultipleLocator(12)
    # x axis ticker formatting
    ax.xaxis.set_major_locator(tick_loc)
    ax.yaxis.set_major_formatter(mticker.StrMethodFormatter('{x:,.0f}'))
    ax.tick_params(axis='x', rotation=45, color='grey')
    ax.set(xlabel=xlegend, ylabel=ylegend)
    ax.spines["left"].set_linewidth(0.25)
    ax.spines["bottom"].set_linewidth(0.25)
    ax.spines["top"].set_linewidth(0)
    ax.spines["right"].set_linewidth(0)
    ax.yaxis.set_tick_params(width=0.5)
    # add legend
    h, l = ax.get_legend_handles_labels()
    ax.legend(h, l, frameon=False, loc='best')
    if title:
        ax.set_title(title)
In [29]:
plot_lines(vis_df)

Group temporal patterns by month

First, merge into single dataframe

In [30]:
stacked_df = pd.DataFrame()
for ix, df in enumerate(vis_df):
    source_name = source_names.get(list(source_names)[ix])
    stacked_df[source_name] = df
    stacked_df.index = df.index
In [31]:
stacked_df.head()
Out[31]:
Instagram Flickr Twitter iNaturalist
datetime
2007-01-31 0.0 2007.0 0.0 0.0
2007-02-28 0.0 1303.0 0.0 0.0
2007-03-31 0.0 1231.0 0.0 7.0
2007-04-30 0.0 3692.0 0.0 10.0
2007-05-31 0.0 3787.0 0.0 73.0
In [32]:
BAR_PARAM = {
    "width":1.0,
    "label":f"Total {METRIC} aggregated for months",
    "edgecolor":"white",
    "linewidth":0.5,
    "alpha":0.7,
}

def plot_bars(
        df: pd.DataFrame, ax: matplotlib.axes = None, title: str = None, 
        ytitle: float = None, padtitle: float = None, legend: bool = None,
        bar_param: Dict[str, Any] = BAR_PARAM, title_legend: str = None,
        xlegend: float = None, ylegend: float = None, lang: str = None):
    """Plot stacked bars from a DataFrame with multiple columns"""
    colors = sns.color_palette("vlag", as_cmap=True, n_colors=2)
    if lang is None:
        lang = "en"
    # create figure
    if not ax:
        fig, ax = plt.subplots(1, 1, figsize=(3, 1.5))
    colors = sns.color_palette("bright", as_cmap=True, n_colors=4)
    # plot
    # calculate mean:
    # exclude months with no data in mean calculation:
    df.replace(0, np.NaN) \
        .groupby(df.index.month, dropna=True) \
        .mean() \
        .plot.bar(ax=ax, stacked=True, color = colors, **bar_param)
    # format
    ax.set_xlim(-0.5,11.5)
    if lang == "en":
        month_names = ['Jan','Feb','Mar','Apr','May','Jun',
                   'Jul','Aug','Sep','Oct','Nov','Dec']
    elif lang == "de":
        month_names = ['Jan','Feb','Mär','Apr','Mai','Jun',
                       'Jul','Aug','Sep','Okt','Nov','Dez']
    ax.set_xticklabels(month_names)
    ax.tick_params(axis='x', rotation=45, length=0) # length: of ticks
    ax.spines["left"].set_linewidth(0.25)
    ax.spines["bottom"].set_linewidth(0.25)
    ax.spines["top"].set_linewidth(0)
    ax.spines["right"].set_linewidth(0)
    ax.yaxis.set_tick_params(width=0.5)
    ax.set(xlabel="", ylabel="")
    if legend: 
        if xlegend is None:
            xlegend = 1.0
        if ylegend is None:
            ylegend = 1
        ax.legend(
            bbox_to_anchor=(xlegend, ylegend), loc='upper left', 
            fontsize=8, frameon=False, title=title_legend, title_fontsize=8)
    else:
        ax.legend().set_visible(False)
    if not title:
        title = f"{METRIC.capitalize()} per month (mean)"
    if ytitle is None:
        ytitle =-0.2
    if padtitle is None:
        padtitle=-14
    ax.set_title(title, y=ytitle, pad=padtitle)
    for item in (
        [ax.xaxis.label, ax.title, ax.yaxis.label] +
         ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(8)
In [33]:
plot_bars(df=stacked_df, legend=True)

Visualize for all Hotspots separately.

In [34]:
hotspots = df_post['hotspot_id'].unique()
print(len(hotspots))
30

Get summary of total posts per Hotspots, for sorting:

In [35]:
def replace_datetime_with_month(df: pd.DataFrame, group_by: str = "hotspot_id"):
    """Extract month from datetime index, set as new composite index
    together with topic_group"""
    df.set_index([df.index.month, group_by], inplace=True)
    df.index.rename(("month", group_by), inplace=True)

Union user count column, to get total user aggregate data

In [36]:
df_post = read_csv_datetime(HOTSPOT_ALL)
replace_datetime_with_month(df_post)
cardinality_series = tools.union_hll_series(
    hll_series=df_post["user_hll"],
    db_conn=db_conn, multiindex=True)
df = cardinality_series.to_frame()
df.rename(columns={"hll_cardinality":"user_count"}, inplace=True)
hotspots_sorted = df.unstack(level=1).fillna(0).droplevel(0, axis=1).sum().sort_values(ascending=False)

Get hotspot labels

Preview ranking order

First: Get Hotspot names for x-labels

  • we also shorten a number of very long names
In [37]:
hotspot_names = {}
for idx in hotspots_sorted.index:
    hotspot_name = df_hotspots[df_hotspots["NUMMER"]==idx].NAME.iloc[0]
    hotspot_names[idx] = hotspot_name
    print(f'{idx} - {hotspot_name} - Postcount: {hotspots_sorted[idx]}')
hotspot_names[27] = "Schleswig-Holsteinische Ostseeküste"
hotspot_names[24] = "Untere Wümmeniederung mit Teufelsmoor"
hotspot_names[13] = "Saar-Ruwer-Hunsrück, Hoch- und Idarwald"
hotspot_names[23] = "Hunte-Leda-Moorniederung, Delmenhorster G."
hotspot_names[18] = "Südharzer Zechsteingürtel, Kyffhäuser und H."
2 - Ammergebirge, Niederwerdenfelser Land und Obere Isar - Postcount: 159119
4 - Ammer-Loisach-Hügelland und Lech-Vorberge - Postcount: 101458
10 - Nördliche Oberrheinebene mit Hardtplatten - Postcount: 80058
12 - Mittelrheintal mit den Seitentälern Nahe und Mosel - Postcount: 51771
19 - Harz - Postcount: 43781
6 - Schwarzwald mit Alb-Wutach-Gebiet - Postcount: 38691
7 - Schwäbische Alb - Postcount: 35382
27 - Schleswig-Holsteinische Ostseeküste mit Angeln, Schwansen und Dänischer Wohld - Postcount: 33830
16 - Thüringer Wald und nördliche Vorländer - Postcount: 27456
30 - Usedom und Ostvorpommersche Küste - Postcount: 26928
29 - Vorpommersche Boddenlandschaft und Rostocker Heide - Postcount: 25428
25 - Mecklenburgisch-Brandenburgisches Kleinseenland - Postcount: 24717
11 - Donnersberg, Pfälzerwald und Haardtrand - Postcount: 20936
8 - Hinterer Bayerischer Wald - Postcount: 16984
28 - Westmecklenburgische Ostseeküste und Lübecker Becken - Postcount: 16413
13 - Saar-Ruwer-Hunsrück, Hoch- und Idarwald und Oberes Nahebergland - Postcount: 15263
1 - Allgäuer Alpen - Postcount: 14186
20 - Oberlausitzer Heide- und Teichlandschaften - Postcount: 14052
22 - Südliches Emsland und nördliche westfälische Bucht - Postcount: 12809
5 - Oberschwäbisches Hügelland und Adelegg - Postcount: 10490
9 - Nördliche Frankenalb - Postcount: 10233
15 - Rhön - Postcount: 9083
18 - Südharzer Zechsteingürtel, Kyffhäuser und Hainleite - Postcount: 8969
26 - Schorfheide mit Neuenhagener Oderinsel - Postcount: 8656
24 - Untere Wümmeniederung mit Teufelsmoor und Wesermünder Geest - Postcount: 8043
3 - Alpenvorland zwischen Mangfall und Inn - Postcount: 7676
14 - Kalk- und Vulkaneifel - Postcount: 6918
17 - Werratal mit Hohem Meißner und Kaufunger Wald - Postcount: 6466
23 - Hunte-Leda-Moorniederung, Delmenhorster Geest und Hümmling - Postcount: 5513
21 - Senne mit angrenzendem Teutoburger Wald - Postcount: 3579
In [38]:
ax = hotspots_sorted.plot.bar(figsize=(6, 2), fontsize=8, color="r", **BAR_PARAM)
ax.spines["left"].set_linewidth(0.25)
ax.spines["bottom"].set_linewidth(0.25)
ax.spines["top"].set_linewidth(0)
ax.spines["right"].set_linewidth(0)
ax.set(xlabel="Hotspot ID", ylabel="Total User Count")
ax.xaxis.label.set_fontsize(8)
ax.yaxis.label.set_fontsize(8)
In [39]:
df_post = read_csv_datetime(HOTSPOT_ALL)
In [40]:
SUBPLOTS = len(hotspots_sorted) # 30 Subplots
In [41]:
hotspots_sorted
Out[41]:
hotspot_id
2     159119
4     101458
10     80058
12     51771
19     43781
6      38691
7      35382
27     33830
16     27456
30     26928
29     25428
25     24717
11     20936
8      16984
28     16413
13     15263
1      14186
20     14052
22     12809
5      10490
9      10233
15      9083
18      8969
26      8656
24      8043
3       7676
14      6918
17      6466
23      5513
21      3579
dtype: int64
In [42]:
hotspot_names
Out[42]:
{2: 'Ammergebirge, Niederwerdenfelser Land und Obere Isar',
 4: 'Ammer-Loisach-Hügelland und Lech-Vorberge',
 10: 'Nördliche Oberrheinebene mit Hardtplatten',
 12: 'Mittelrheintal mit den Seitentälern Nahe und Mosel',
 19: 'Harz',
 6: 'Schwarzwald mit Alb-Wutach-Gebiet',
 7: 'Schwäbische Alb',
 27: 'Schleswig-Holsteinische Ostseeküste',
 16: 'Thüringer Wald und nördliche Vorländer',
 30: 'Usedom und Ostvorpommersche Küste',
 29: 'Vorpommersche Boddenlandschaft und Rostocker Heide',
 25: 'Mecklenburgisch-Brandenburgisches Kleinseenland',
 11: 'Donnersberg, Pfälzerwald und Haardtrand',
 8: 'Hinterer Bayerischer Wald',
 28: 'Westmecklenburgische Ostseeküste und Lübecker Becken',
 13: 'Saar-Ruwer-Hunsrück, Hoch- und Idarwald',
 1: 'Allgäuer Alpen',
 20: 'Oberlausitzer Heide- und Teichlandschaften',
 22: 'Südliches Emsland und nördliche westfälische Bucht',
 5: 'Oberschwäbisches Hügelland und Adelegg',
 9: 'Nördliche Frankenalb',
 15: 'Rhön',
 18: 'Südharzer Zechsteingürtel, Kyffhäuser und H.',
 26: 'Schorfheide mit Neuenhagener Oderinsel',
 24: 'Untere Wümmeniederung mit Teufelsmoor',
 3: 'Alpenvorland zwischen Mangfall und Inn',
 14: 'Kalk- und Vulkaneifel',
 17: 'Werratal mit Hohem Meißner und Kaufunger Wald',
 23: 'Hunte-Leda-Moorniederung, Delmenhorster G.',
 21: 'Senne mit angrenzendem Teutoburger Wald'}
In [43]:
lang = "en"
In [44]:
# create figure object with multiple subplots
fig, axes = plt.subplots(nrows=int(round(SUBPLOTS/4)), ncols=4, figsize=(12, 18))
fig.subplots_adjust(hspace=.9) # adjust vertical space, to allow title below plot
# remove subplots beyond 30
fig.delaxes(axes[7][2])
fig.delaxes(axes[7][3])
# iterate hotspots
for ix, ax in enumerate(axes.reshape(-1)):
    if ix >= SUBPLOTS:
        break
    hotspot_id = list(hotspot_names.keys())[ix]
    hotspot_name = textwrap.fill(hotspot_names.get(hotspot_id), 20)
    # filter np_str and calculate cardinality
    df_post_filter = df_post[df_post["hotspot_id"]==hotspot_id].copy()
    stacked_df = pd.DataFrame()
    # process and stack into single df
    for idx, name in source_names.items():
        sel_df = df_post_filter[df_post_filter["origin_id"]==idx].copy()
        processed_df = process_df(sel_df)
        stacked_df[name] = processed_df
        stacked_df.index = processed_df.index        
    # plot bars individually
    plot_kwargs = {
        "ytitle":-0.65, 
        "padtitle":0, 
        "legend":False,
        "lang":lang
    }
    avrg_txt = "Average"
    if lang == "de":
        avrg_txt = "Durchschnitt"
    if ix == SUBPLOTS-1:
        # add legend on last subplot
        plot_kwargs["legend"] = True
        plot_kwargs["xlegend"] = 1.5
        plot_kwargs["title_legend"] = f'{METRIC.capitalize()} \n{avrg_txt} {LOWER_YEAR}-{UPPER_YEAR}'
    plot_bars(
        df=stacked_df, title=f'{hotspot_id}: {hotspot_name}', ax=ax, **plot_kwargs)

save as svg and png

In [45]:
tools.save_fig(fig, output=OUTPUT, name=f"hotspots_months_{METRIC}")

Create map with labels for paper Figure. Select Hotspots 2, 14, and 25.

In [46]:
df_sel = df_hotspots[df_hotspots["NUMMER"].isin([2, 14, 25])]
In [47]:
df_sel
Out[47]:
NAME FLAECHE GLS NUMMER FLAECHEQKM area_ha STATUS geometry
3 Ammergebirge, Niederwerdenfelser Land und Ober... 48785.121903 Alpen 2 487.851219 48785.12190 1 POLYGON ((661082.685 5274264.900, 661102.380 5...
8 Kalk- und Vulkaneifel 95624.595511 Westliches Mittelgebirge 14 956.245955 95624.59551 0 POLYGON ((343378.545 5608504.928, 343387.481 5...
9 Mecklenburgisch-Brandenburgisches Kleinseenland 230640.189646 Nordostdeutsches Tiefland 25 2516.392460 251638.81344 0 POLYGON ((746802.676 5937303.491, 747012.020 5...

manually create label offset

In [48]:
label_off = {
    2:(0, 200000),
    14:(200000, 0),
    25:(-200000, 0)}
label_rad = {
    2:0.1,
    14:0.5,
    25:-0.3}
In [57]:
fig, ax = plt.subplots(1, 1, figsize=(2, 4))
ax = shapes.plot(ax=ax, color='none', edgecolor='black', linewidth=0.2, figsize=(2, 4))
ax = df_hotspots.plot(ax=ax, color='green')
ax = df_sel.plot(ax=ax, color='red')
tools.annotate_locations(
    gdf=df_sel, ax=ax, label_off=label_off, label_rad=label_rad, 
    text_col="NUMMER", arrowstyle='-', arrow_col='black', fontsize=14)
ax.set_axis_off()
In [58]:
tools.save_fig(fig, output=OUTPUT, name=f"overview_hotspots")

Create Release File

First convert all svg to pdf, for archive purposes and paper submission.

In [63]:
WEB_DRIVER = preparations.load_chromedriver()
Chromedriver loaded. Svg output enabled.
In [67]:
%%time
tools.convert_svg_pdf(in_dir=OUTPUT / "svg", out_dir=OUTPUT / "pdf")
Processed 21 of 21 files..
CPU times: user 3.05 s, sys: 3.08 s, total: 6.14 s
Wall time: 35.7 s

Create release file with all results

Create a release file that contains ipynb notebooks, HTML, figures, svg and python converted files.

Make sure that 7z is available (apt-get install p7zip-full)

In [72]:
!cd .. && git config --system --add safe.directory '*' \
    && RELEASE_VERSION=$(git describe --tags --abbrev=0) \
    && 7z a -tzip -mx=9 out/release_$RELEASE_VERSION.zip \
    md/* py/* out/*.csv resources/* out/pdf/* out/svg/* \
    out/figures/* notebooks/*.ipynb \
    README.md jupytext.toml nbconvert.tpl \
    -x!py/__pycache__ -x!py/modules/__pycache__ -x!py/modules/.ipynb_checkpoints \
    -y > /dev/null

Create notebook HTML

In [73]:
!jupyter nbconvert --to html_toc \
    --output-dir=../resources/html/ ./05_hotspots.ipynb \
    --template=../nbconvert.tpl \
    --ExtractOutputPreprocessor.enabled=False >&- 2>&-
In [ ]: