Files
code_problems/src/rosalind/finding_protein_motif.rs
Myeongseon Choi b25ed7e13d
Some checks failed
mint_ci / Check Python code using ruff (push) Successful in 19s
Rust-lint / Run rust tests (push) Failing after 55s
Rust-lint / Check Rust code with rustfmt and clippy (push) Failing after 36s
Add new problem modules and utility functions for Euler and Rosalind challenges
- Introduced `prob700`, `prob808`, and `prob816` modules for new problem implementations in the Project Euler series.
- Added utility functions for calculating Euler coins and their sums, as well as methods for finding reversible primes and calculating shortest distances in a modular context.
- Updated the `mod.rs` files to include the new problem modules in both the Project Euler and Rosalind sections.
- Enhanced the `integer` and `modulo` utility modules with new functions for coprimality checks and modular multiplication.
- Included unit tests for the new functionalities to ensure correctness and reliability.
- Refactored existing tests for consistency in the `finding_protein_motif` module.
2025-05-23 11:12:37 +09:00

93 lines
3.0 KiB
Rust

use crate::utils::biology::{motif::Motif, uniprot::get_sequence_from_uniprot};
use std::thread::sleep;
use std::time::Duration;
pub fn find_protein_motif(protein: &str, motif: &str) -> Vec<usize> {
let motif = Motif::from_str(motif).unwrap();
motif
.find_matches(protein)
.into_iter()
.map(|p| p + 1)
.collect()
}
pub fn find_protein_motif_in_uniprot(uniprot_id: &str, motif: &str) -> Vec<usize> {
let sequence = get_sequence_from_uniprot(uniprot_id).unwrap();
println!("{}", sequence);
find_protein_motif(&sequence, motif)
}
pub fn find_protein_motif_in_uniprot_list(inputs: &str, motif: &str) -> String {
let mut result = Vec::new();
for input in inputs.split(" ") {
let uniprot_id = input.split("_").next().unwrap();
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
if !positions.is_empty() {
result.push(format!(
"{}\n{}",
input,
positions
.iter()
.map(|p| p.to_string())
.collect::<Vec<String>>()
.join(" ")
));
}
sleep(Duration::from_millis(10));
}
result.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_protein_motif() {
let protein = "MRASRPVVHPVEAPPPAALAVAAAAVAVEAGVGAGGGAAAHGGENAQPRGVRMKDPPGAPGTPGGLGLRLVQAFFAAAALAVMASTDDFPSVSAFCYLVAAAILQCLWSLSLAVVDIYALLVKRSLRNPQAVCIFTIGDGITGTLTLGAACASAGITVLIGNDLNICANNHCASFETATAMAFISWFALAPSCVLNFWSMASR";
let motif = "N{P}[ST]{P}";
let positions = find_protein_motif(protein, motif);
assert_eq!(positions, Vec::<usize>::new());
let protein = "GCATGATACATG";
let motif = "CAT";
let positions = find_protein_motif(protein, motif);
assert_eq!(positions, vec![2, 9]);
let protein = "AAAAA";
let motif = "AA";
let positions = find_protein_motif(protein, motif);
assert_eq!(positions, vec![1, 2, 3, 4]);
}
#[test]
fn test_find_protein_motif_in_uniprot() {
let motif = "N{P}[ST]{P}";
let uniprot_id = "A2Z669";
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
assert_eq!(positions, Vec::<usize>::new());
sleep(Duration::from_millis(10));
let uniprot_id = "B5ZC00";
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
assert_eq!(positions, vec![85, 118, 142, 306, 395]);
sleep(Duration::from_millis(10));
let uniprot_id = "P07204";
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
assert_eq!(positions, vec![47, 115, 116, 382, 409]);
sleep(Duration::from_millis(10));
let uniprot_id = "P20840";
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
assert_eq!(
positions,
vec![79, 109, 135, 248, 306, 348, 364, 402, 485, 501, 614]
);
}
}