generated from mschoi/template
- Introduced `prob700`, `prob808`, and `prob816` modules for new problem implementations in the Project Euler series. - Added utility functions for calculating Euler coins and their sums, as well as methods for finding reversible primes and calculating shortest distances in a modular context. - Updated the `mod.rs` files to include the new problem modules in both the Project Euler and Rosalind sections. - Enhanced the `integer` and `modulo` utility modules with new functions for coprimality checks and modular multiplication. - Included unit tests for the new functionalities to ensure correctness and reliability. - Refactored existing tests for consistency in the `finding_protein_motif` module.
93 lines
3.0 KiB
Rust
93 lines
3.0 KiB
Rust
use crate::utils::biology::{motif::Motif, uniprot::get_sequence_from_uniprot};
|
|
use std::thread::sleep;
|
|
use std::time::Duration;
|
|
|
|
pub fn find_protein_motif(protein: &str, motif: &str) -> Vec<usize> {
|
|
let motif = Motif::from_str(motif).unwrap();
|
|
motif
|
|
.find_matches(protein)
|
|
.into_iter()
|
|
.map(|p| p + 1)
|
|
.collect()
|
|
}
|
|
|
|
pub fn find_protein_motif_in_uniprot(uniprot_id: &str, motif: &str) -> Vec<usize> {
|
|
let sequence = get_sequence_from_uniprot(uniprot_id).unwrap();
|
|
println!("{}", sequence);
|
|
find_protein_motif(&sequence, motif)
|
|
}
|
|
|
|
pub fn find_protein_motif_in_uniprot_list(inputs: &str, motif: &str) -> String {
|
|
let mut result = Vec::new();
|
|
for input in inputs.split(" ") {
|
|
let uniprot_id = input.split("_").next().unwrap();
|
|
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
|
|
if !positions.is_empty() {
|
|
result.push(format!(
|
|
"{}\n{}",
|
|
input,
|
|
positions
|
|
.iter()
|
|
.map(|p| p.to_string())
|
|
.collect::<Vec<String>>()
|
|
.join(" ")
|
|
));
|
|
}
|
|
sleep(Duration::from_millis(10));
|
|
}
|
|
result.join("\n")
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_find_protein_motif() {
|
|
let protein = "MRASRPVVHPVEAPPPAALAVAAAAVAVEAGVGAGGGAAAHGGENAQPRGVRMKDPPGAPGTPGGLGLRLVQAFFAAAALAVMASTDDFPSVSAFCYLVAAAILQCLWSLSLAVVDIYALLVKRSLRNPQAVCIFTIGDGITGTLTLGAACASAGITVLIGNDLNICANNHCASFETATAMAFISWFALAPSCVLNFWSMASR";
|
|
let motif = "N{P}[ST]{P}";
|
|
let positions = find_protein_motif(protein, motif);
|
|
assert_eq!(positions, Vec::<usize>::new());
|
|
|
|
let protein = "GCATGATACATG";
|
|
let motif = "CAT";
|
|
let positions = find_protein_motif(protein, motif);
|
|
assert_eq!(positions, vec![2, 9]);
|
|
|
|
let protein = "AAAAA";
|
|
let motif = "AA";
|
|
let positions = find_protein_motif(protein, motif);
|
|
assert_eq!(positions, vec![1, 2, 3, 4]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_find_protein_motif_in_uniprot() {
|
|
let motif = "N{P}[ST]{P}";
|
|
|
|
let uniprot_id = "A2Z669";
|
|
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
|
|
assert_eq!(positions, Vec::<usize>::new());
|
|
|
|
sleep(Duration::from_millis(10));
|
|
|
|
let uniprot_id = "B5ZC00";
|
|
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
|
|
assert_eq!(positions, vec![85, 118, 142, 306, 395]);
|
|
|
|
sleep(Duration::from_millis(10));
|
|
|
|
let uniprot_id = "P07204";
|
|
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
|
|
assert_eq!(positions, vec![47, 115, 116, 382, 409]);
|
|
|
|
sleep(Duration::from_millis(10));
|
|
|
|
let uniprot_id = "P20840";
|
|
let positions = find_protein_motif_in_uniprot(uniprot_id, motif);
|
|
assert_eq!(
|
|
positions,
|
|
vec![79, 109, 135, 248, 306, 348, 364, 402, 485, 501, 614]
|
|
);
|
|
}
|
|
}
|