Source code for full_dia.deepmall
import numpy as np
import pandas as pd
import torch
from full_dia import cfg, fxic, utils
from full_dia.log import Logger
try:
_ = profile
except NameError:
logger = Logger.get_logger()
[docs]
def extract_mall(
df_batch: pd.DataFrame,
map_gpu_ms1: dict,
map_gpu_ms2: dict,
tol_im: float,
tol_ppm: float,
) -> torch.Tensor:
"""
Extract top-12 fragment ions mall from ms.
Parameters
----------
df_batch : pd.DataFrame
Provide the pr info.
map_gpu_ms1 : dict
Provide the MS1 data.
map_gpu_ms2 : dict
Provide the MS2 data.
tol_im : float
Tolerance of ion mobility.
tol_ppm : float
Tolerance of ppm.
Returns
-------
Mall : torch.Tensor
Contain the fragment ions info:
pred_heights, xics, ppms, bias_ims, fg_type, SA, areas, snr
"""
# measure spectrum with smooth
locus, rts, ims, mzs, xics = fxic.extract_xics(
df_batch,
map_gpu_ms1,
map_gpu_ms2,
im_tolerance=tol_im,
ppm_tolerance=tol_ppm,
cycle_num=13,
)
# [n_pep, n_ion, n_cycle]
xics = fxic.gpu_simple_smooth(xics)
ims = utils.convert_numba_to_tensor(ims)[:, 2:, :]
mzs = utils.convert_numba_to_tensor(mzs)[:, 2:, :]
xics = utils.convert_numba_to_tensor(xics)[:, 2:, :]
center_idx = int((xics.shape[-1] - 1) / 2)
xics_mall = xics[:, :, (center_idx - 1) : (center_idx + 2)]
xics_mall = xics_mall.permute((0, 2, 1)) # [n_pep, n_cycle, n_ion]
xics_mall = xics_mall / (torch.amax(xics_mall, dim=-1, keepdim=True) + 1e-7)
# bias_im
ims = ims.permute((0, 2, 1))
ims = ims[:, (center_idx - 1) : (center_idx + 2), :]
pred_ims = df_batch["pred_im"].values
pred_ims = torch.from_numpy(pred_ims).to(cfg.gpu_id)
pred_ims = pred_ims.unsqueeze(-1).unsqueeze(-1).expand(ims.shape)
bias_ims = pred_ims - ims
bias_ims[ims < 0] = cfg.tol_im_xic
bias_ims = bias_ims / cfg.tol_im_xic
# ppm
mzs = mzs.permute((0, 2, 1))
mzs = mzs[:, (center_idx - 1) : (center_idx + 2), :]
cols_center = ["fg_mz_" + str(i) for i in range(cfg.fg_num)]
pred_mzs = df_batch[cols_center].values
pred_mzs = torch.from_numpy(pred_mzs).to(cfg.gpu_id)
pred_mzs = pred_mzs.unsqueeze(1).expand(mzs.shape)
ppms = 1e6 * (pred_mzs - mzs) / (pred_mzs + 1e-7)
ppms[mzs < 1] = cfg.tol_ppm
ppms = ppms / cfg.tol_ppm
# sa
cols = ["score_center_elution_" + str(i) for i in range(14)]
elutions = df_batch[cols].values[:, 2:]
elutions = torch.from_numpy(elutions).to(cfg.gpu_id)
elutions = elutions.unsqueeze(1)
# area
locus_start_v = df_batch["score_elute_span_left"].values
locus_end_v = df_batch["score_elute_span_right"].values
xics = xics.cpu().numpy()
mask1 = np.arange(xics.shape[2]) >= locus_start_v[:, None, None]
mask2 = np.arange(xics.shape[2]) <= locus_end_v[:, None, None]
xics = xics * mask1 * mask2
rts = np.repeat(rts[:, np.newaxis, :], xics.shape[1], axis=1)
areas = np.trapz(xics, x=rts, axis=2)
areas = areas / (areas.max(axis=1, keepdims=True) + 1e-7)
areas = torch.from_numpy(areas).to(cfg.gpu_id)
areas = areas.unsqueeze(1)
# pred intensities
cols_height = ["fg_height_" + str(i) for i in range(cfg.fg_num)]
pred_heights = df_batch[cols_height].values
pred_heights = torch.from_numpy(pred_heights).to(cfg.gpu_id)
pred_heights = pred_heights.unsqueeze(1)
# ion type
cols_anno = ["fg_anno_" + str(i) for i in range(cfg.fg_num)]
fg_type = df_batch[cols_anno].values // 1000
fg_type = torch.from_numpy(fg_type.astype(np.float32)).to(cfg.gpu_id)
fg_type = fg_type.unsqueeze(1)
# snr
cols = ["score_center_snr_" + str(i) for i in range(14)]
snr = df_batch[cols].values[:, 2:]
snr = torch.from_numpy(snr).to(cfg.gpu_id)
snr = snr.unsqueeze(1)
mall = torch.cat(
[pred_heights, xics_mall, ppms, bias_ims, fg_type, elutions, areas, snr], dim=1
)
return mall
[docs]
def scoring_mall(
model_mall: torch.nn.Module,
df_input: pd.DataFrame,
map_gpu_ms1: dict,
map_gpu_ms2: dict,
tol_im: float,
tol_ppm: float,
) -> tuple:
"""
Extract and score the Malls for elution groups.
Parameters
----------
model_mall: torch.nn.Module
The trained DeepMall model.
df_input : pd.DataFrame
Provide the pr info.
map_gpu_ms1 : dict
Provide the MS1 data.
map_gpu_ms2 : dict
Provide the MS2 data.
tol_im : float
Tolerance of ion mobility.
tol_ppm : float
Tolerance of ppm.
Returns
-------
tuple
pred : np.ndarray
The scores by DeepMall.
feature : np.ndarray
The features by DeepMall.
"""
mall = extract_mall(df_input, map_gpu_ms1, map_gpu_ms2, tol_im, tol_ppm)
valid_ion_nums = df_input["fg_num"].values
valid_ion_nums = torch.from_numpy(valid_ion_nums).long().to(cfg.gpu_id)
with torch.no_grad():
feature, pred = model_mall(mall, valid_ion_nums)
pred = torch.softmax(pred, 1)
pred = pred[:, 1].cpu().numpy()
feature = feature.cpu().numpy()
return pred, feature