@article{0b0ee3274e194dd1a1ba78967ff43ad4,
title = "A SPARSE NEGATIVE BINOMIAL CLASSIFIER WITH COVARIATE ADJUSTMENT FOR RNA-SEQ DATA",
abstract = "Supervised machine learning methods have been increasingly used in biomedical research and clinical practice. In transcriptomic applications, RNA-seq data have become dominating and have gradually replaced tradi-tional microarray, due to their reduced background noise and increased digital precision. Most existing machine learning methods are, however, designed for continuous intensities of microarray and are not suitable for RNA-seq count data. In this paper we develop a negative binomial model via general-ized linear model framework with double regularization for gene and covari-ate sparsity to accommodate three key elements: adequate modeling of count data with overdispersion, gene selection and adjustment for covariate effect. The proposed sparse negative binomial classifier (snbClass) is evaluated in simulations and two real applications of multidisease postmortem brain tissue RNA-seq data and cervical tumor miRNA-seq data to demonstrate its superior performance in prediction accuracy and feature selection.",
author = "Tanbin Rahman and Huang, {Hsin En} and Yujia Li and Tai, {An Shun} and Hseih, {Wen Ping} and McClung, {Colleen A.} and George Tseng",
note = "Funding Information: Funding. Data were generated as part of the CommonMind Consortium supported by funding from Takeda Pharmaceuticals Company Limited, F. Hoffman-La Roche Ltd and NIH grants R01MH085542, R01MH093725, P50MH066392, P50MH080405, R01MH097276, RO1MH075916, P50M096891, P50MH084053S1, R37MH057881, AG02219, MH06692, R01MH110921, R01MH109677, R01MH109897, U01MH103392, AG05138 and contract HHSN271201300031C through IRP NIMH. TR, YL and GT are funded by NIH grant R01CA190766 and R21LM012752. GT and CM are funded by NIH R01MH111601 and P50DA046346. HH, AT and WH are funded by MOST 107-2118-M-007-001. Funding Information: The multidisease postmortem brain RNA-seq dataset was obtained from NIMH Repository & Genomics Resource, a centralized national biorepository for ge-netic studies of psychiatric disorders. Brain tissue for the study was obtained from the fol-lowing brain bank collections: the Mount Sinai NIH Brain and Tissue Repository, the University of Pennsylvania Alzheimers Disease Core Center, the University of Pittsburgh Neu-roBioBank and Brain and Tissue Repositories and the NIMH Human Brain Collection Core. CMC Leadership: Panos Roussos, Joseph Buxbaum, Andrew Chess, Schahram Akbarian, Vahram Haroutunian (Icahn School of Medicine at Mount Sinai), Bernie Devlin, David Lewis (University of Pittsburgh), Raquel Gur, Chang-Gyu Hahn (University of Pennsylvania), En-rico Domenici (University of Trento), Mette A. Peters, Solveig Sieberts (Sage Bionetworks), Thomas Lehner, Stefano Marenco, Barbara K. Lipska (NIMH). All correspondences for the manuscript should be addressed to George Tseng. Funding. Data were generated as part of the CommonMind Consortium supported by funding from Takeda Pharmaceuticals Company Limited, F. Hoffman-La Roche Ltd and NIH grants R01MH085542, R01MH093725, P50MH066392, P50MH080405, R01MH097276, RO1MH075916, P50M096891, P50MH084053S1, R37MH057881, AG02219, MH06692, R01MH110921, R01MH109677, R01MH109897, U01MH103392, AG05138 and contract HHSN271201300031C through IRP NIMH. TR, YL and GT are funded by NIH grant R01CA190766 and R21LM012752. GT and CM are funded by NIH R01MH111601 and P50DA046346. HH, AT and WH are funded by MOST 107-2118-M-007-001. Publisher Copyright: {\textcopyright} Institute of Mathematical Statistics, 2022.",
year = "2022",
month = jun,
doi = "10.1214/21-AOAS1532",
language = "English",
volume = "16",
pages = "1071--1089",
journal = "Annals of Applied Statistics",
issn = "1932-6157",
publisher = "Institute of Mathematical Statistics",
number = "2",
}