Preprocessing
[1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from cartopy import crs as ccrs, feature as cfeature
from tqdm import tqdm
import requests
import io
import tempfile
import arviz as az
import importlib
from paleokalmag import PaleoKalmag
from paleokalmag.data_handling import read_data
# load ArchKalmag14k.r model
r = requests.get(
"https://nextcloud.gfz-potsdam.de/s/BqN2XD7c9jK3AsF/download", stream=True
)
archkalmag = PaleoKalmag(io.BytesIO(r.raw.read()), name="ArchKalmag14k.r", legacy=True)
# Alternatively, you can download the model and save it locally
# archkalmag = PaleoKalmag(
# "path/to/ArchKalmag14k.r.npz", name="ArchKalmag14k.r"
# )
from age_depth_modeling.evaluate_adm import evaluate_adm
from constants import mad_to_alpha_factors, default_alpha95
from calibrate_dec import calibrate
from plot_functions import plot_adm, plot_D_I
pre = "https://nextcloud.gfz-potsdam.de/s/"
post = "/download"
adm_links = {
"BA3": f"{pre}WQaSYe9eNffzyZJ{post}",
"BYE": f"{pre}qH5Zc685D8g9BYb{post}",
"ByaP2": f"{pre}LAgSbXkkDKjcgrC{post}",
"ByaP3": f"{pre}tEipBed36aYASP3{post}",
"EIL": f"{pre}JitiXQXPrjbjRRe{post}",
"FUR": f"{pre}mSMrTBnmWZ8wCsz{post}",
"P2": f"{pre}jYd7ka4ncWeiF6e{post}",
"P3": f"{pre}p8yWE8SZYgWQg7i{post}",
"GYL": f"{pre}5pD7kZE5gGK68kF{post}",
"GP1": f"{pre}JEdgZXQXS3dMdcT{post}",
"GP2": f"{pre}8XHGZFCpL3xRW2g{post}",
"GP4": f"{pre}DSJXNmH9xqPY6di{post}",
"MOR": f"{pre}oKSjPZN7BgxdZF6{post}",
"U1305": f"{pre}7Seyd4w44sfkZcC{post}",
"BIR_unflat": f"{pre}sAnaqtoJrSd9S2X{post}",
}
/tmp/ipykernel_1762/3593069225.py:1: DeprecationWarning:
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
import pandas as pd
cpu
[2]:
core_ids = [
"BA3",
"BYE",
# "ByaP2",
# "ByaP3",
# "EIL",
# "FUR",
# "P2",
# "P3",
# "GYL",
# "GP1",
# "GP2",
# "GP4",
# "MOR",
# "U1305",
# "BIR_unflat",
]
cores = {}
for core_id in tqdm(core_ids):
print(core_id)
lib = importlib.import_module(core_id + "." + core_id)
df = lib.paleomag_data.copy()
demag_steps = lib.demag_steps
try:
df.loc[lib.remove_D, "D"] = np.nan
df.loc[lib.remove_I, "I"] = np.nan
except AttributeError:
pass
df.dropna(subset=["D", "I"], how="all", inplace=True)
df.reset_index(drop=True, inplace=True)
df["lon"].where(df["lon"] > 0, other=df["lon"] + 360, inplace=True)
if "MAD" in df.columns:
df["alpha_95"] = mad_to_alpha_factors[str(demag_steps)] * df["MAD"]
else:
df["MAD"] = np.nan
if "alpha_95" not in df.columns:
df["alpha_95"] = np.nan
df["alpha_95"].where(
df["alpha_95"].notna(), other=default_alpha95, inplace=True
)
df["dI"] = (57.3 / 140) * df["alpha_95"]
df["dD"] = np.where(
df["I"].notna(),
df["dI"] / np.cos(np.deg2rad(df["I"])),
df["dI"] / np.cos(np.arctan(2 * np.tan(np.deg2rad(df["lat"])))),
)
df["type"] = "sediment"
try:
# idata = az.InferenceData.from_netcdf(
# f"../../dat/age_depth_models/{core_id}_adm.nc"
# )
r = requests.get(adm_links[core_id])
with tempfile.NamedTemporaryFile(mode='w+b', delete=False) as temp_file:
temp_file.write(r.content)
idata = az.from_netcdf(temp_file.name)
t, dt = evaluate_adm(idata, df["depth"].values)
df["t"] = t
df["dt"] = dt
except FileNotFoundError:
df["t"] = np.nan
df["dt"] = np.nan
df["D_rel"] = df["D"]
for key, group in df.groupby("core"):
try:
cal_mean, cal_std = calibrate(
group[group["t"] >= -12000], archkalmag
)
print(f"{core_id} - {key}: {cal_mean:.2f} +/- {cal_std:.2f}")
df.loc[df["core"] == key, "D"] = (
df.loc[df["core"] == key, "D"] - cal_mean
)
except:
cal_mean, cal_std = calibrate(
group[group["t_old"] >= -12000], archkalmag
)
print(f"{core_id} - {key}: {cal_mean:.2f} +/- {cal_std:.2f}")
df.loc[df["core"] == key, "D"] = (
df.loc[df["core"] == key, "D"] - cal_mean
)
df["D"].where(df["D"] <= 180, df["D"] - 360, inplace=True)
df = df[
[
"depth",
"t",
"dt",
"t_old",
"lat",
"lon",
"D",
"D_rel",
"I",
"dD",
"dI",
"MAD",
"core",
"type",
"UID",
"FID",
"alpha_95",
]
]
df.sort_values("t", inplace=True)
df.reset_index(drop=True, inplace=True)
cores[core_id] = df
- more-to-come:
- class:
stderr
- 0%| | 0/2 [00:00<?, ?it/s]
</pre>
- 0%| | 0/2 [00:00<?, ?it/s]
end{sphinxVerbatim}
0%| | 0/2 [00:00<?, ?it/s]
BA3
/tmp/ipykernel_1762/11818589.py:31: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
/tmp/ipykernel_1762/11818589.py:38: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
/opt/conda/envs/virtualenv/lib/python3.10/site-packages/arviz/data/inference_data.py:153: UserWarning: adm_pars group is not defined in the InferenceData scheme
BA3 - GEO17603-3-1: -2.40 +/- 0.39
- more-to-come:
- class:
stderr
/tmp/ipykernel_1762/11818589.py:80: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method. The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy. For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
- 50%|█████ | 1/2 [00:04<00:04, 4.68s/it]
</pre>
- 50%|█████ | 1/2 [00:04<00:04, 4.68s/it]
end{sphinxVerbatim}
50%|█████ | 1/2 [00:04<00:04, 4.68s/it]
BA3 - GEO17603-3-2: -25.95 +/- 0.57
BYE
/tmp/ipykernel_1762/11818589.py:31: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
/tmp/ipykernel_1762/11818589.py:38: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
/opt/conda/envs/virtualenv/lib/python3.10/site-packages/arviz/data/inference_data.py:153: UserWarning: adm_pars group is not defined in the InferenceData scheme
BYE - ByaP2: -1.05 +/- 0.90
/tmp/ipykernel_1762/11818589.py:80: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
- 100%|██████████| 2/2 [00:07<00:00, 3.60s/it]
</pre>
- 100%|██████████| 2/2 [00:07<00:00, 3.60s/it]
end{sphinxVerbatim}
100%|██████████| 2/2 [00:07<00:00, 3.60s/it]
- 100%|██████████| 2/2 [00:07<00:00, 3.76s/it]
</pre>
- 100%|██████████| 2/2 [00:07<00:00, 3.76s/it]
end{sphinxVerbatim}
100%|██████████| 2/2 [00:07<00:00, 3.76s/it]
BYE - ByaP3: 0.05 +/- 1.02
[3]:
for i, (id, core) in enumerate(cores.items()):
fig, axs = plt.subplot_mosaic(
[
4 * ["D"],
4 * ["I"],
],
figsize=(10, 3),
gridspec_kw=dict(hspace=0, wspace=0.8),
dpi=300,
)
plot_D_I(
axs["D"],
axs["I"],
core,
time_or_depth="depth",
archkalmag14k=archkalmag,
limit_y_to_data=True,
)
fig.align_ylabels()
fig.align_xlabels()
plt.show()
[4]:
proj = ccrs.Mollweide()
fig = plt.figure(figsize=(10, 4), dpi=400)
sph_ax = plt.subplot(projection=proj)
sph_ax.set_global()
sph_ax.add_feature(cfeature.LAND, zorder=0, color="lightgray")
with np.load("../../dat/rejection_list.npz", allow_pickle=True) as fh:
to_reject = fh["to_reject"]
arch_data = read_data("../../dat/archkalmag_data.csv", rejection_lists=to_reject)
sph_ax.scatter(
arch_data.lon,
arch_data.lat,
color="gray",
s=10,
alpha=0.8,
lw=0,
rasterized=True,
transform=ccrs.PlateCarree(),
label="Archaoemagnetic data"
)
for i, (core_id, core) in enumerate(cores.items()):
if core_id in ["BIR_unflat", "ByaP2", "ByaP3", "GP1", "GP2", "GP4", "P2", "P3"]:
continue
label = f"{core_id}"
for j, (key, group) in enumerate(core.groupby("core")):
sph_ax.scatter(
group.lon,
group.lat,
color=plt.get_cmap("turbo", len(cores.values()))(i),
label=label if j == len(core["core"].unique()) - 1 else None,
s=70,
marker="*",
rasterized=True,
transform=ccrs.PlateCarree(),
)
sph_ax.legend(
loc="center", prop={"size": 6}, bbox_to_anchor=(0.5, -0.05), ncol=11
)
fig.autofmt_xdate()
fig.tight_layout()
Rejected 1150 outliers.
/opt/conda/envs/virtualenv/lib/python3.10/site-packages/paleokalmag/data_handling.py:160: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
/opt/conda/envs/virtualenv/lib/python3.10/site-packages/paleokalmag/data_handling.py:166: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
/opt/conda/envs/virtualenv/lib/python3.10/site-packages/paleokalmag/data_handling.py:180: UserWarning: Records with indices [ 72 73 74 75 76 77 78 79 80 81 108 109
356 357 1064 1065 1152 1613 2399 2572 2668 2701 2750 2753
2754 3032 3211 3213 3214 3215 3216 3217 3218 3219 3220 3221
3499 3768 3984 4043 4231 4411 4500 4526 4527 4651 5168 5322
5430 5547 5703 5759 5798 5801 5955 6049 6101 6563 6643 6702
6777 6805 7145 7576 7581 7589 7688 7698 7731 7772 7802 7822
7877 7975 8063 8292 8323 8423 8506 8666 8704 8748 9220 10128
10327 10505 10598 10600 10643 11523 11713 11754] contain declination, but not inclination! The errors need special treatment!
To be able to use the provided data, these records have been dropped from the output.
/opt/conda/envs/virtualenv/lib/python3.10/site-packages/paleokalmag/data_handling.py:189: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
/opt/conda/envs/virtualenv/lib/python3.10/site-packages/paleokalmag/data_handling.py:197: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
/opt/conda/envs/virtualenv/lib/python3.10/site-packages/paleokalmag/data_handling.py:198: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
/opt/conda/envs/virtualenv/lib/python3.10/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_land.zip
[5]:
for core_id, core in cores.items():
core[
[
"depth",
"t",
"dt",
"lat",
"lon",
"D",
"I",
"dD",
"dI",
"type",
"core",
"UID",
"FID",
]
]#.to_csv(f"../../dat/preprocessed_data/{core_id}.csv", index=False)