Add DNA and RNA utilities for biological computations
Some checks failed
mint_ci / Check Python code using ruff (push) Successful in 17s
Rust-lint / Run rust tests (push) Failing after 55s
Rust-lint / Check Rust code with rustfmt and clippy (push) Failing after 44s

- Introduced a new `dna` module for DNA representation and functionality, including conversion from RNA and complement methods.
- Updated the `rna` module to include complement and reverse complement methods, along with a new `RNACodon` struct for RNA codon representation.
- Refactored translation functionality to utilize `RNACodon` instead of the previous `Codon` struct.
- Added unit tests for new functionalities in both `dna` and `rna` modules to ensure correctness.
- Updated the biology module to include the new `dna` module.
This commit is contained in:
2025-05-15 08:47:44 +09:00
parent 02fc9d5915
commit 8a99c29fd1
6 changed files with 555 additions and 89 deletions

View File

@@ -0,0 +1,125 @@
use crate::utils::biology::{
aa::AminoAcid, rna::RNA, translation::TranslationResult, translation::translate,
};
use std::collections::HashSet;
use std::iter::DoubleEndedIterator;
fn find_sub_orf(orf: Vec<AminoAcid>) -> Option<Vec<Vec<AminoAcid>>> {
let mut list_orf = Vec::new();
for (i, aa) in orf.iter().skip(1).enumerate() {
match aa {
AminoAcid::Methionine => {
list_orf.push(orf[i + 1..].to_vec());
}
_ => {}
}
}
if list_orf.len() > 0 {
Some(list_orf)
} else {
None
}
}
fn find_orf<T>(translation_iter: T) -> Option<Vec<Vec<AminoAcid>>>
where
T: Iterator<Item = TranslationResult>,
{
let mut list_orf = Vec::new();
let mut is_start = false;
let mut orf = Vec::new();
for translation_result in translation_iter {
match translation_result {
TranslationResult::AminoAcid(AminoAcid::Methionine) => {
is_start = true;
orf.push(AminoAcid::Methionine);
}
TranslationResult::AminoAcid(aa) => {
if is_start {
orf.push(aa);
}
}
TranslationResult::StopCodon => {
if is_start {
list_orf.push(orf);
}
is_start = false;
orf = Vec::new();
}
}
}
let extended_list_orf = list_orf.into_iter().fold(Vec::new(), |mut acc, orf| {
acc.push(orf.clone());
if let Some(sub_orf) = find_sub_orf(orf) {
acc.extend(sub_orf);
}
acc
});
if extended_list_orf.len() > 0 {
Some(extended_list_orf)
} else {
None
}
}
pub fn find_all_orf<T, V>(rna_iter: T) -> Vec<Vec<AminoAcid>>
where
T: IntoIterator<Item = V> + Clone,
V: Into<RNA>,
<T as IntoIterator>::IntoIter: DoubleEndedIterator,
{
let mut orfs = Vec::new();
for i in 0..3 {
let cloned_iter = rna_iter.clone().into_iter().map(|v| v.into()).skip(i);
if let Some(orf) = find_orf(translate(cloned_iter)) {
orfs.extend(orf);
}
let cloned_rev_iter =
RNA::reverse_complement(rna_iter.clone().into_iter().map(|v| v.into())).skip(i);
if let Some(orf) = find_orf(translate(cloned_rev_iter)) {
orfs.extend(orf);
}
}
orfs
}
pub fn find_all_orf_strings<T, V>(rna_iter: T) -> HashSet<String>
where
T: IntoIterator<Item = V> + Clone,
V: Into<RNA>,
<T as IntoIterator>::IntoIter: DoubleEndedIterator,
{
find_all_orf(rna_iter)
.into_iter()
.map(|orf| {
orf.into_iter()
.map(|aa| format!("{}", aa))
.collect::<String>()
})
.collect::<HashSet<String>>()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::utils::biology::dna::DNA;
#[test]
fn test_find_orf() {
let dna_str = "AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG";
let orf = find_all_orf_strings(dna_str.chars().map(|c| DNA::from(c)));
let expected_result = vec![
"MLLGSFRLIPKETLIQVAGSSPCNLS",
"M",
"MGMTPRLGLESLLE",
"MTPRLGLESLLE",
];
assert_eq!(orf.len(), expected_result.len());
for er in expected_result {
assert!(orf.contains(&er.to_string()));
}
}
}

155
src/utils/biology/dna.rs Normal file
View File

@@ -0,0 +1,155 @@
use crate::utils::biology::rna::RNA;
use std::convert::From;
use std::fmt::{self, Display};
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum DNA {
Adenine,
Cytosine,
Guanine,
Thymine,
}
pub enum DNAFrame {
Forward(usize),
Reverse(usize),
}
impl Display for DNA {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self)
}
}
impl From<char> for DNA {
fn from(c: char) -> Self {
match c {
'A' => DNA::Adenine,
'C' => DNA::Cytosine,
'G' => DNA::Guanine,
'T' => DNA::Thymine,
_ => panic!("Invalid DNA character: {}", c),
}
}
}
impl Into<char> for DNA {
fn into(self) -> char {
match self {
DNA::Adenine => 'A',
DNA::Cytosine => 'C',
DNA::Guanine => 'G',
DNA::Thymine => 'T',
}
}
}
impl DNA {
pub fn complement(&self) -> DNA {
match self {
DNA::Adenine => DNA::Thymine,
DNA::Cytosine => DNA::Guanine,
DNA::Guanine => DNA::Cytosine,
DNA::Thymine => DNA::Adenine,
}
}
}
impl From<RNA> for DNA {
fn from(rna: RNA) -> Self {
match rna {
RNA::Adenine => DNA::Adenine,
RNA::Cytosine => DNA::Cytosine,
RNA::Guanine => DNA::Guanine,
RNA::Uracil => DNA::Thymine,
}
}
}
impl From<DNA> for RNA {
fn from(dna: DNA) -> Self {
match dna {
DNA::Adenine => RNA::Adenine,
DNA::Cytosine => RNA::Cytosine,
DNA::Guanine => RNA::Guanine,
DNA::Thymine => RNA::Uracil,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dna_from_char() {
let dna = DNA::from('A');
assert_eq!(dna, DNA::Adenine);
let dna = DNA::from('C');
assert_eq!(dna, DNA::Cytosine);
let dna = DNA::from('G');
assert_eq!(dna, DNA::Guanine);
let dna = DNA::from('T');
assert_eq!(dna, DNA::Thymine);
}
#[test]
fn test_dna_into_char() {
let dna = DNA::Adenine;
let c: char = dna.into();
assert_eq!(c, 'A');
let dna = DNA::Cytosine;
let c: char = dna.into();
assert_eq!(c, 'C');
let dna = DNA::Guanine;
let c: char = dna.into();
assert_eq!(c, 'G');
let dna = DNA::Thymine;
let c: char = dna.into();
assert_eq!(c, 'T');
}
#[test]
fn test_dna_from_rna() {
let rna = RNA::Adenine;
let dna: DNA = rna.into();
assert_eq!(dna, DNA::Adenine);
let rna = RNA::Cytosine;
let dna: DNA = rna.into();
assert_eq!(dna, DNA::Cytosine);
let rna = RNA::Guanine;
let dna: DNA = rna.into();
assert_eq!(dna, DNA::Guanine);
let rna = RNA::Uracil;
let dna: DNA = rna.into();
assert_eq!(dna, DNA::Thymine);
}
#[test]
fn test_rna_from_dna() {
let dna = DNA::Adenine;
let rna: RNA = dna.into();
assert_eq!(rna, RNA::Adenine);
let dna = DNA::Cytosine;
let rna: RNA = dna.into();
assert_eq!(rna, RNA::Cytosine);
let dna = DNA::Guanine;
let rna: RNA = dna.into();
assert_eq!(rna, RNA::Guanine);
let dna = DNA::Thymine;
let rna: RNA = dna.into();
assert_eq!(rna, RNA::Uracil);
}
}

View File

@@ -1,4 +1,5 @@
pub mod aa;
pub mod dna;
pub mod motif;
pub mod rna;
pub mod translation;

View File

@@ -39,23 +39,58 @@ impl Into<char> for RNA {
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct Codon(pub(crate) RNA, pub(crate) RNA, pub(crate) RNA);
impl RNA {
pub fn complement(&self) -> RNA {
match self {
RNA::Adenine => RNA::Uracil,
RNA::Cytosine => RNA::Guanine,
RNA::Guanine => RNA::Cytosine,
RNA::Uracil => RNA::Adenine,
}
}
impl Codon {
pub fn new(a: RNA, b: RNA, c: RNA) -> Self {
Self(a, b, c)
pub fn reverse_complement<T>(rna_iter: T) -> impl Iterator<Item = RNA>
where
T: Iterator<Item = Self> + DoubleEndedIterator,
{
rna_iter.rev().map(|rna| rna.complement())
}
}
impl Display for Codon {
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct RNACodon(pub(crate) RNA, pub(crate) RNA, pub(crate) RNA);
impl RNACodon {
pub fn new(a: RNA, b: RNA, c: RNA) -> Self {
Self(a, b, c)
}
pub fn from_iter<T, V>(s: T) -> Vec<Self>
where
T: IntoIterator<Item = V>,
V: Into<RNA>,
{
s.into_iter()
.map(|v| v.into())
.collect::<Vec<RNA>>()
.chunks_exact(3)
.map(|c| RNACodon(c[0], c[1], c[2]))
.collect()
}
pub fn from_str(s: &str) -> Vec<Self> {
Self::from_iter(s.chars())
}
}
impl Display for RNACodon {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s: String = (*self).into();
write!(f, "{}", s)
}
}
impl TryFrom<String> for Codon {
impl TryFrom<String> for RNACodon {
type Error = ();
fn try_from(s: String) -> Result<Self, Self::Error> {
@@ -68,7 +103,7 @@ impl TryFrom<String> for Codon {
}
}
impl Into<String> for Codon {
impl Into<String> for RNACodon {
fn into(self) -> String {
format!("{}{}{}", self.0, self.1, self.2)
}
@@ -77,7 +112,7 @@ impl Into<String> for Codon {
#[cfg(test)]
mod tests {
use super::*;
use crate::utils::biology::dna::DNA;
#[test]
fn test_rna_into_char() {
let rna = RNA::Adenine;
@@ -117,17 +152,72 @@ mod tests {
}
#[test]
fn test_codon_try_from_string() {
let codon = Codon::try_from("AAA".to_string());
assert_eq!(codon, Ok(Codon(RNA::Adenine, RNA::Adenine, RNA::Adenine)));
let codon = Codon::try_from("CCC".to_string());
fn test_rnacodon_try_from_string() {
let codon = RNACodon::try_from("AAA".to_string());
assert_eq!(
codon,
Ok(Codon(RNA::Cytosine, RNA::Cytosine, RNA::Cytosine))
Ok(RNACodon(RNA::Adenine, RNA::Adenine, RNA::Adenine))
);
let codon = Codon::try_from("GGG".to_string());
assert_eq!(codon, Ok(Codon(RNA::Guanine, RNA::Guanine, RNA::Guanine)));
let codon = RNACodon::try_from("CCC".to_string());
assert_eq!(
codon,
Ok(RNACodon(RNA::Cytosine, RNA::Cytosine, RNA::Cytosine))
);
let codon = RNACodon::try_from("GGG".to_string());
assert_eq!(
codon,
Ok(RNACodon(RNA::Guanine, RNA::Guanine, RNA::Guanine))
);
}
#[test]
fn test_rna_iterator() {
let rna_string = "AGUAGUAGUAGU".to_string();
assert_eq!(
RNACodon::from_str(&rna_string),
vec![
RNACodon::new(RNA::Adenine, RNA::Guanine, RNA::Uracil),
RNACodon::new(RNA::Adenine, RNA::Guanine, RNA::Uracil),
RNACodon::new(RNA::Adenine, RNA::Guanine, RNA::Uracil),
RNACodon::new(RNA::Adenine, RNA::Guanine, RNA::Uracil)
]
);
assert_eq!(
RNACodon::from_str(&rna_string[1..]),
vec![
RNACodon::new(RNA::Guanine, RNA::Uracil, RNA::Adenine),
RNACodon::new(RNA::Guanine, RNA::Uracil, RNA::Adenine),
RNACodon::new(RNA::Guanine, RNA::Uracil, RNA::Adenine),
]
);
assert_eq!(
RNACodon::from_str(&rna_string[2..]),
vec![
RNACodon::new(RNA::Uracil, RNA::Adenine, RNA::Guanine),
RNACodon::new(RNA::Uracil, RNA::Adenine, RNA::Guanine),
RNACodon::new(RNA::Uracil, RNA::Adenine, RNA::Guanine),
]
);
}
#[test]
fn test_rna_from_dna_iter() {
let dna_string = "AGCTAGCTAGCT".to_string();
let dna_iter = dna_string.chars().map(|c| DNA::from(c));
let rna_codons = RNACodon::from_iter(dna_iter);
assert_eq!(
rna_codons,
vec![
RNACodon::new(RNA::Adenine, RNA::Guanine, RNA::Cytosine),
RNACodon::new(RNA::Uracil, RNA::Adenine, RNA::Guanine),
RNACodon::new(RNA::Cytosine, RNA::Uracil, RNA::Adenine),
RNACodon::new(RNA::Guanine, RNA::Cytosine, RNA::Uracil),
]
);
}
}

View File

@@ -1,5 +1,5 @@
use crate::utils::biology::aa::AminoAcid;
use crate::utils::biology::rna::{Codon, RNA};
use crate::utils::biology::rna::{RNA, RNACodon};
use std::convert::From;
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
@@ -8,134 +8,134 @@ pub enum TranslationResult {
StopCodon,
}
impl From<Codon> for TranslationResult {
fn from(codon: Codon) -> Self {
impl From<RNACodon> for TranslationResult {
fn from(codon: RNACodon) -> Self {
match codon {
// Phenylalanine (F)
Codon(RNA::Uracil, RNA::Uracil, RNA::Uracil)
| Codon(RNA::Uracil, RNA::Uracil, RNA::Cytosine) => {
RNACodon(RNA::Uracil, RNA::Uracil, RNA::Uracil)
| RNACodon(RNA::Uracil, RNA::Uracil, RNA::Cytosine) => {
TranslationResult::AminoAcid(AminoAcid::Phenylalanine)
}
// Leucine (L)
Codon(RNA::Uracil, RNA::Uracil, RNA::Adenine)
| Codon(RNA::Uracil, RNA::Uracil, RNA::Guanine)
| Codon(RNA::Cytosine, RNA::Uracil, RNA::Uracil)
| Codon(RNA::Cytosine, RNA::Uracil, RNA::Cytosine)
| Codon(RNA::Cytosine, RNA::Uracil, RNA::Adenine)
| Codon(RNA::Cytosine, RNA::Uracil, RNA::Guanine) => {
RNACodon(RNA::Uracil, RNA::Uracil, RNA::Adenine)
| RNACodon(RNA::Uracil, RNA::Uracil, RNA::Guanine)
| RNACodon(RNA::Cytosine, RNA::Uracil, RNA::Uracil)
| RNACodon(RNA::Cytosine, RNA::Uracil, RNA::Cytosine)
| RNACodon(RNA::Cytosine, RNA::Uracil, RNA::Adenine)
| RNACodon(RNA::Cytosine, RNA::Uracil, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Leucine)
}
// Isoleucine (I)
Codon(RNA::Adenine, RNA::Uracil, RNA::Uracil)
| Codon(RNA::Adenine, RNA::Uracil, RNA::Cytosine)
| Codon(RNA::Adenine, RNA::Uracil, RNA::Adenine) => {
RNACodon(RNA::Adenine, RNA::Uracil, RNA::Uracil)
| RNACodon(RNA::Adenine, RNA::Uracil, RNA::Cytosine)
| RNACodon(RNA::Adenine, RNA::Uracil, RNA::Adenine) => {
TranslationResult::AminoAcid(AminoAcid::Isoleucine)
}
// Methionine (M) / Start
Codon(RNA::Adenine, RNA::Uracil, RNA::Guanine) => {
RNACodon(RNA::Adenine, RNA::Uracil, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Methionine)
}
// Valine (V)
Codon(RNA::Guanine, RNA::Uracil, RNA::Uracil)
| Codon(RNA::Guanine, RNA::Uracil, RNA::Cytosine)
| Codon(RNA::Guanine, RNA::Uracil, RNA::Adenine)
| Codon(RNA::Guanine, RNA::Uracil, RNA::Guanine) => {
RNACodon(RNA::Guanine, RNA::Uracil, RNA::Uracil)
| RNACodon(RNA::Guanine, RNA::Uracil, RNA::Cytosine)
| RNACodon(RNA::Guanine, RNA::Uracil, RNA::Adenine)
| RNACodon(RNA::Guanine, RNA::Uracil, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Valine)
}
// Serine (S)
Codon(RNA::Uracil, RNA::Cytosine, RNA::Uracil)
| Codon(RNA::Uracil, RNA::Cytosine, RNA::Cytosine)
| Codon(RNA::Uracil, RNA::Cytosine, RNA::Adenine)
| Codon(RNA::Uracil, RNA::Cytosine, RNA::Guanine)
| Codon(RNA::Adenine, RNA::Guanine, RNA::Uracil)
| Codon(RNA::Adenine, RNA::Guanine, RNA::Cytosine) => {
RNACodon(RNA::Uracil, RNA::Cytosine, RNA::Uracil)
| RNACodon(RNA::Uracil, RNA::Cytosine, RNA::Cytosine)
| RNACodon(RNA::Uracil, RNA::Cytosine, RNA::Adenine)
| RNACodon(RNA::Uracil, RNA::Cytosine, RNA::Guanine)
| RNACodon(RNA::Adenine, RNA::Guanine, RNA::Uracil)
| RNACodon(RNA::Adenine, RNA::Guanine, RNA::Cytosine) => {
TranslationResult::AminoAcid(AminoAcid::Serine)
}
// Proline (P)
Codon(RNA::Cytosine, RNA::Cytosine, RNA::Uracil)
| Codon(RNA::Cytosine, RNA::Cytosine, RNA::Cytosine)
| Codon(RNA::Cytosine, RNA::Cytosine, RNA::Adenine)
| Codon(RNA::Cytosine, RNA::Cytosine, RNA::Guanine) => {
RNACodon(RNA::Cytosine, RNA::Cytosine, RNA::Uracil)
| RNACodon(RNA::Cytosine, RNA::Cytosine, RNA::Cytosine)
| RNACodon(RNA::Cytosine, RNA::Cytosine, RNA::Adenine)
| RNACodon(RNA::Cytosine, RNA::Cytosine, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Proline)
}
// Threonine (T)
Codon(RNA::Adenine, RNA::Cytosine, RNA::Uracil)
| Codon(RNA::Adenine, RNA::Cytosine, RNA::Cytosine)
| Codon(RNA::Adenine, RNA::Cytosine, RNA::Adenine)
| Codon(RNA::Adenine, RNA::Cytosine, RNA::Guanine) => {
RNACodon(RNA::Adenine, RNA::Cytosine, RNA::Uracil)
| RNACodon(RNA::Adenine, RNA::Cytosine, RNA::Cytosine)
| RNACodon(RNA::Adenine, RNA::Cytosine, RNA::Adenine)
| RNACodon(RNA::Adenine, RNA::Cytosine, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Threonine)
}
// Alanine (A)
Codon(RNA::Guanine, RNA::Cytosine, RNA::Uracil)
| Codon(RNA::Guanine, RNA::Cytosine, RNA::Cytosine)
| Codon(RNA::Guanine, RNA::Cytosine, RNA::Adenine)
| Codon(RNA::Guanine, RNA::Cytosine, RNA::Guanine) => {
RNACodon(RNA::Guanine, RNA::Cytosine, RNA::Uracil)
| RNACodon(RNA::Guanine, RNA::Cytosine, RNA::Cytosine)
| RNACodon(RNA::Guanine, RNA::Cytosine, RNA::Adenine)
| RNACodon(RNA::Guanine, RNA::Cytosine, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Alanine)
}
// Tyrosine (Y)
Codon(RNA::Uracil, RNA::Adenine, RNA::Uracil)
| Codon(RNA::Uracil, RNA::Adenine, RNA::Cytosine) => {
RNACodon(RNA::Uracil, RNA::Adenine, RNA::Uracil)
| RNACodon(RNA::Uracil, RNA::Adenine, RNA::Cytosine) => {
TranslationResult::AminoAcid(AminoAcid::Tyrosine)
}
// Histidine (H)
Codon(RNA::Cytosine, RNA::Adenine, RNA::Uracil)
| Codon(RNA::Cytosine, RNA::Adenine, RNA::Cytosine) => {
RNACodon(RNA::Cytosine, RNA::Adenine, RNA::Uracil)
| RNACodon(RNA::Cytosine, RNA::Adenine, RNA::Cytosine) => {
TranslationResult::AminoAcid(AminoAcid::Histidine)
}
// Glutamine (Q)
Codon(RNA::Cytosine, RNA::Adenine, RNA::Adenine)
| Codon(RNA::Cytosine, RNA::Adenine, RNA::Guanine) => {
RNACodon(RNA::Cytosine, RNA::Adenine, RNA::Adenine)
| RNACodon(RNA::Cytosine, RNA::Adenine, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Glutamine)
}
// Asparagine (N)
Codon(RNA::Adenine, RNA::Adenine, RNA::Uracil)
| Codon(RNA::Adenine, RNA::Adenine, RNA::Cytosine) => {
RNACodon(RNA::Adenine, RNA::Adenine, RNA::Uracil)
| RNACodon(RNA::Adenine, RNA::Adenine, RNA::Cytosine) => {
TranslationResult::AminoAcid(AminoAcid::Asparagine)
}
// Lysine (K)
Codon(RNA::Adenine, RNA::Adenine, RNA::Adenine)
| Codon(RNA::Adenine, RNA::Adenine, RNA::Guanine) => {
RNACodon(RNA::Adenine, RNA::Adenine, RNA::Adenine)
| RNACodon(RNA::Adenine, RNA::Adenine, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Lysine)
}
// Aspartic Acid (D)
Codon(RNA::Guanine, RNA::Adenine, RNA::Uracil)
| Codon(RNA::Guanine, RNA::Adenine, RNA::Cytosine) => {
RNACodon(RNA::Guanine, RNA::Adenine, RNA::Uracil)
| RNACodon(RNA::Guanine, RNA::Adenine, RNA::Cytosine) => {
TranslationResult::AminoAcid(AminoAcid::AsparticAcid)
}
// Glutamic Acid (E)
Codon(RNA::Guanine, RNA::Adenine, RNA::Adenine)
| Codon(RNA::Guanine, RNA::Adenine, RNA::Guanine) => {
RNACodon(RNA::Guanine, RNA::Adenine, RNA::Adenine)
| RNACodon(RNA::Guanine, RNA::Adenine, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::GlutamicAcid)
}
// Cysteine (C)
Codon(RNA::Uracil, RNA::Guanine, RNA::Uracil)
| Codon(RNA::Uracil, RNA::Guanine, RNA::Cytosine) => {
RNACodon(RNA::Uracil, RNA::Guanine, RNA::Uracil)
| RNACodon(RNA::Uracil, RNA::Guanine, RNA::Cytosine) => {
TranslationResult::AminoAcid(AminoAcid::Cysteine)
}
// Tryptophan (W)
Codon(RNA::Uracil, RNA::Guanine, RNA::Guanine) => {
RNACodon(RNA::Uracil, RNA::Guanine, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Tryptophan)
}
// Arginine (R)
Codon(RNA::Cytosine, RNA::Guanine, RNA::Uracil)
| Codon(RNA::Cytosine, RNA::Guanine, RNA::Cytosine)
| Codon(RNA::Cytosine, RNA::Guanine, RNA::Adenine)
| Codon(RNA::Cytosine, RNA::Guanine, RNA::Guanine)
| Codon(RNA::Adenine, RNA::Guanine, RNA::Adenine)
| Codon(RNA::Adenine, RNA::Guanine, RNA::Guanine) => {
RNACodon(RNA::Cytosine, RNA::Guanine, RNA::Uracil)
| RNACodon(RNA::Cytosine, RNA::Guanine, RNA::Cytosine)
| RNACodon(RNA::Cytosine, RNA::Guanine, RNA::Adenine)
| RNACodon(RNA::Cytosine, RNA::Guanine, RNA::Guanine)
| RNACodon(RNA::Adenine, RNA::Guanine, RNA::Adenine)
| RNACodon(RNA::Adenine, RNA::Guanine, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Arginine)
}
// Glycine (G)
Codon(RNA::Guanine, RNA::Guanine, RNA::Uracil)
| Codon(RNA::Guanine, RNA::Guanine, RNA::Cytosine)
| Codon(RNA::Guanine, RNA::Guanine, RNA::Adenine)
| Codon(RNA::Guanine, RNA::Guanine, RNA::Guanine) => {
RNACodon(RNA::Guanine, RNA::Guanine, RNA::Uracil)
| RNACodon(RNA::Guanine, RNA::Guanine, RNA::Cytosine)
| RNACodon(RNA::Guanine, RNA::Guanine, RNA::Adenine)
| RNACodon(RNA::Guanine, RNA::Guanine, RNA::Guanine) => {
TranslationResult::AminoAcid(AminoAcid::Glycine)
}
// Stop Codons
Codon(RNA::Uracil, RNA::Adenine, RNA::Adenine)
| Codon(RNA::Uracil, RNA::Adenine, RNA::Guanine)
| Codon(RNA::Uracil, RNA::Guanine, RNA::Adenine) => TranslationResult::StopCodon,
RNACodon(RNA::Uracil, RNA::Adenine, RNA::Adenine)
| RNACodon(RNA::Uracil, RNA::Adenine, RNA::Guanine)
| RNACodon(RNA::Uracil, RNA::Guanine, RNA::Adenine) => TranslationResult::StopCodon,
}
}
}
@@ -165,9 +165,20 @@ pub fn count_possible_mrna_from_aa(aa: AminoAcid) -> u64 {
}
}
pub fn translate<T, V>(rna_iter: T) -> impl Iterator<Item = TranslationResult>
where
T: IntoIterator<Item = V>,
V: Into<RNA>,
{
RNACodon::from_iter(rna_iter)
.into_iter()
.map(TranslationResult::from)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::utils::biology::dna::DNA;
use itertools::Itertools;
#[test]
@@ -179,7 +190,7 @@ mod tests {
.map(|v| v.into_iter().collect::<String>())
.collect();
for codon_str in codon_list {
let codon = Codon::try_from(codon_str.clone()).unwrap();
let codon = RNACodon::try_from(codon_str.clone()).unwrap();
let translation_result = TranslationResult::from(codon);
match codon_str.as_str() {
// Phenylalanine (F)
@@ -331,4 +342,92 @@ mod tests {
}
}
}
#[test]
fn test_translate_rna_string() {
let rna_str = "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA";
let mut translation_iterator = translate(rna_str.chars());
let expected_translation_result = "MAMAPRTEINSTRING";
for c in expected_translation_result.chars() {
assert_eq!(
translation_iterator.next(),
Some(TranslationResult::AminoAcid(c.into()))
);
}
assert_eq!(
translation_iterator.next(),
Some(TranslationResult::StopCodon)
);
let mut translation_iterator = translate(rna_str[1..].chars());
let expected_translation_result = "WPWRPELRSIVPVLTG";
for c in expected_translation_result.chars() {
assert_eq!(
translation_iterator.next(),
Some(TranslationResult::AminoAcid(c.into()))
);
}
assert_eq!(translation_iterator.next(), None);
let mut translation_iterator = translate(rna_str[2..].chars());
let expected_translation_result = "GHGAQN-DQ-YPY-RV";
for c in expected_translation_result.chars() {
match c {
'-' => assert_eq!(
translation_iterator.next(),
Some(TranslationResult::StopCodon)
),
_ => assert_eq!(
translation_iterator.next(),
Some(TranslationResult::AminoAcid(c.into()))
),
}
}
assert_eq!(translation_iterator.next(), None);
}
#[test]
fn test_translate_dna_string() {
let dna_str = "ATGGCCATGGCGCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA";
let mut translation_iterator = translate(dna_str.chars().map(|c| DNA::from(c)));
let expected_translation_result = "MAMAPRTEINSTRING";
for c in expected_translation_result.chars() {
assert_eq!(
translation_iterator.next(),
Some(TranslationResult::AminoAcid(c.into()))
);
}
assert_eq!(
translation_iterator.next(),
Some(TranslationResult::StopCodon)
);
let mut translation_iterator = translate(dna_str[1..].chars().map(|c| DNA::from(c)));
let expected_translation_result = "WPWRPELRSIVPVLTG";
for c in expected_translation_result.chars() {
assert_eq!(
translation_iterator.next(),
Some(TranslationResult::AminoAcid(c.into()))
);
}
assert_eq!(translation_iterator.next(), None);
let mut translation_iterator = translate(dna_str[2..].chars().map(|c| DNA::from(c)));
let expected_translation_result = "GHGAQN-DQ-YPY-RV";
for c in expected_translation_result.chars() {
match c {
'-' => assert_eq!(
translation_iterator.next(),
Some(TranslationResult::StopCodon)
),
_ => assert_eq!(
translation_iterator.next(),
Some(TranslationResult::AminoAcid(c.into()))
),
}
}
assert_eq!(translation_iterator.next(), None);
}
}

View File

@@ -4,10 +4,6 @@ use ureq::get;
const UNIPROT_URL: &str = "http://rest.uniprot.org/uniprotkb";
pub fn get_primary_accession_from_uniprot(uniprot_id: &str) -> Result<String, String> {
todo!()
}
pub fn get_sequence_from_uniprot(uniprot_id: &str) -> Result<String, String> {
let url = format!("{}/{}.fasta", UNIPROT_URL, uniprot_id);