@article{c7f8f286884d4d92bb85817f7988c895,
title = "Introgressions lead to reference bias in wheat RNA-seq analysis",
abstract = "Background: RNA-seq is a fundamental technique in genomics, yet reference bias, where transcripts derived from non-reference alleles are quantified less accurately, can undermine the accuracy of RNA-seq quantification and thus the conclusions made downstream. Reference bias in RNA-seq analysis has yet to be explored in complex polyploid genomes despite evidence that they are often a complex mosaic of wild relative introgressions, which introduce blocks of highly divergent genes. Results: Here we use hexaploid wheat as a model complex polyploid, using both simulated and experimental data to show that RNA-seq alignment in wheat suffers from widespread reference bias which is largely driven by divergent introgressed genes. This leads to underestimation of gene expression and incorrect assessment of homoeologue expression balance. By incorporating gene models from ten wheat genome assemblies into a pantranscriptome reference, we present a novel method to reduce reference bias, which can be readily scaled to capture more variation as new genome and transcriptome data becomes available. Conclusions: This study shows that the presence of introgressions can lead to reference bias in wheat RNA-seq analysis. Caution should be exercised by researchers using non-sample reference genomes for RNA-seq alignment and novel methods, such as the one presented here, should be considered.",
keywords = "Genomics, Introgressions, Polyploidy, Reference bias, RNA-seq, Wheat",
author = "Benedict Coombes and Thomas Lux and Eduard Akhunov and Anthony Hall",
note = "Availability of data and materials: The pantranscriptome reference, along with a python script to sum expression counts across all transcripts of a given Chinese Spring gene and its 1-to-1 orthologues, can be accessed via figshare at https://doi.org/10.6084/m9.figshare.24242767 [53]. The RNA-seq data and DNA sequencing data generated by He et al. [11] are stored in the European Nucleotide Archive under project codes PRJNA670223 [54] and PRJNA787276 [55]. The wheat cultivar genomes and annotations generated as part of the 10+ wheat genomes project [20] can be accessed on Ensembl Plants release 58 via https://plants.ensembl.org/Triticum_aestivum/Info/Cultivars [56]. Funding Information: BC was supported by the BBSRC funded Norwich Research Park Biosciences Doctoral Training Partnership grant BB/M011216/1. AH was supported by the Biotechnology and Biological Sciences Research Council (BBSRC), part of UK Research and Innovation; Earlham Institute Strategic Programme Grant BB/X011089/1 and BBS/E/ER/230002B (Decode WP2 Genome Enabled Analysis of Diversity to Identify Gene Function, Biosynthetic Pathways And Variation In Agri/Aquacultural Traits). EA is supported by the Agriculture and Food Research Initiative Competitive Grants 2022–68013-36439 (WheatCAP) and grant INV-004430 from Bill and Melinda Gates Foundation. ",
year = "2024",
month = mar,
day = "7",
doi = "10.1186/s12915-024-01853-w",
language = "English",
volume = "22",
journal = "BMC Biology",
issn = "1741-7007",
publisher = "BioMed Central",
}