From c637a639532965d149c4121175f00218886ea088 Mon Sep 17 00:00:00 2001 From: demian3b Date: Wed, 15 Apr 2026 23:48:51 +0900 Subject: [PATCH] Update project configuration and documentation for GPU support; add GPU environment check script and CUDA runtime validation --- .gitignore | 2 ++ Cargo.toml | 8 ++------ README.md | 16 +++++++++++++++- examples/toy_pipeline.rs | 15 +-------------- scripts/gpu_env_check.sh | 20 ++++++++++++++++++++ scripts/gpu_smoke_test.py | 23 +++++++++++++++++++++++ src/api.rs | 1 - src/burn_adapter.rs | 7 ------- 8 files changed, 63 insertions(+), 29 deletions(-) create mode 100755 scripts/gpu_env_check.sh create mode 100644 scripts/gpu_smoke_test.py diff --git a/.gitignore b/.gitignore index 5730393..18b22ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +outputs + # ---> Rust # Generated by Cargo # will have compiled files and executables diff --git a/Cargo.toml b/Cargo.toml index d861ee7..d3672cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,17 +3,13 @@ name = "riemann-flow-gnn" version = "0.1.0" edition = "2021" -[features] -default = [] -burn = ["dep:burn", "dep:safetensors"] - [dependencies] plotters = { version = "0.3", default-features = false, features = ["bitmap_backend", "bitmap_encoder"] } serde = { version = "1", features = ["derive"] } serde_json = "1" thiserror = "1" -burn = { version = "0.14", optional = true, features = ["autodiff", "ndarray"] } -safetensors = { version = "0.4", optional = true } +burn = { version = "0.14", features = ["autodiff", "cuda-jit"] } +safetensors = { version = "0.4" } [dev-dependencies] approx = "0.5" diff --git a/README.md b/README.md index 58b432d..b6c77eb 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,20 @@ Riemannian Flow matching with GNN implementation in rust/burn cargo run --example toy_pipeline ``` +### NVIDIA GPU 환경 점검 + +```bash +bash scripts/gpu_env_check.sh +``` + +점검 항목: +- `nvidia-smi` 드라이버/디바이스 인식 +- `nvcc --version` CUDA toolkit 인식 +- `scripts/gpu_smoke_test.py` CUDA matmul 런타임 실행 +- `cargo check --example toy_pipeline` Burn 빌드 스모크 테스트 + +예제 학습 스켈레톤은 **기본으로 Burn CUDA JIT 백엔드(GPU)** 를 사용합니다. NVIDIA 드라이버와 CUDA 개발 환경(`nvcc` 등)이 필요합니다. + 예제는 파이프라인 계약 확인용으로 동작합니다. ### 현재 포함 범위 @@ -20,7 +34,7 @@ cargo run --example toy_pipeline - graph/domain struct (`Atom`, `Bond`, `TorsionEdge`, `LigandGraph`) - one-sample entry API (`api::OneSampleToyPipeline`) - graph/pose/simulator 최소 계약 -- Burn tensor shape 중심 adapter skeleton (`--features burn`) +- Burn tensor shape 중심 adapter skeleton (기본 활성) - TODO 기반 구현 포인트 주석 ### 이후 단계 권장 diff --git a/examples/toy_pipeline.rs b/examples/toy_pipeline.rs index f1e65e2..16a8c31 100644 --- a/examples/toy_pipeline.rs +++ b/examples/toy_pipeline.rs @@ -1,8 +1,6 @@ -#[cfg(feature = "burn")] use std::{collections::BTreeMap, marker::PhantomData, path::Path}; use std::fs; -#[cfg(feature = "burn")] use burn::{backend::Autodiff, tensor::backend::AutodiffBackend}; use riemann_flow_gnn::{ api::{OneSampleToyBatch, OneSampleToyPipeline}, @@ -12,18 +10,15 @@ use riemann_flow_gnn::{ viz::{export_simulation_video, plot_static_png}, }; use serde::Deserialize; -#[cfg(feature = "burn")] use riemann_flow_gnn::api::burn_training::{ run_burn_training, BurnToyModel, BurnTrainConfig, }; -#[cfg(feature = "burn")] use safetensors::{ serialize_to_file, tensor::{Dtype, TensorView}, }; -#[cfg(feature = "burn")] -type ExampleBackend = Autodiff; +type ExampleBackend = Autodiff; #[derive(Debug, Deserialize)] struct ToyAtom { @@ -100,12 +95,10 @@ fn load_toy_reference(path: &str) -> Result<(LigandGraph, LigandConformer), Box< Ok((graph, reference)) } -#[cfg(feature = "burn")] struct ExampleBurnModel { _backend: PhantomData, } -#[cfg(feature = "burn")] impl ExampleBurnModel { fn forward_template(&self, _graph: &riemann_flow_gnn::graph::GraphFeatures) -> Result<(), String> { // TODO(user): Burn tensor 입력(GraphTensors, PoseTensors) 기반 forward 구현 @@ -123,7 +116,6 @@ impl ExampleBurnModel { } } -#[cfg(feature = "burn")] impl BurnToyModel for ExampleBurnModel { fn name(&self) -> &'static str { "burn-toy-model" @@ -196,7 +188,6 @@ fn main() -> Result<(), Box> { }); let graph_features = pipeline.graph_contract(); - #[cfg(feature = "burn")] let history = { let device = Default::default(); let mut model = ExampleBurnModel::::new(&device); @@ -233,16 +224,12 @@ fn main() -> Result<(), Box> { graph_features.spec.node_feat_dim, graph_features.spec.edge_feat_dim ); - #[cfg(feature = "burn")] println!( "burn training finished: epochs={}, last_loss={:.6}", history.len(), history.last().map(|m| m.loss).unwrap_or(0.0) ); - #[cfg(feature = "burn")] println!("saved checkpoints: outputs/checkpoints/*.safetensors"); - #[cfg(not(feature = "burn"))] - println!("burn feature is off: training skeleton skipped"); println!("saved video: outputs/simulation.mp4"); println!("target reference: data/toy/aspirin_reference.json"); Ok(()) diff --git a/scripts/gpu_env_check.sh b/scripts/gpu_env_check.sh new file mode 100755 index 0000000..b6bdab4 --- /dev/null +++ b/scripts/gpu_env_check.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "[1/4] NVIDIA driver check" +nvidia-smi + +echo +echo "[2/4] CUDA compiler check" +nvcc --version + +echo +echo "[3/4] PyTorch CUDA runtime check" +python "scripts/gpu_smoke_test.py" + +echo +echo "[4/4] Burn feature compile check" +cargo check --example toy_pipeline + +echo +echo "GPU environment check finished successfully." diff --git a/scripts/gpu_smoke_test.py b/scripts/gpu_smoke_test.py new file mode 100644 index 0000000..396fe74 --- /dev/null +++ b/scripts/gpu_smoke_test.py @@ -0,0 +1,23 @@ +import torch + + +def main() -> None: + print(f"torch={torch.__version__}") + print(f"cuda_available={torch.cuda.is_available()}") + if not torch.cuda.is_available(): + raise SystemExit("CUDA is not available") + + device_count = torch.cuda.device_count() + print(f"cuda_device_count={device_count}") + for i in range(device_count): + print(f"gpu[{i}]={torch.cuda.get_device_name(i)}") + + x = torch.randn((2048, 2048), device="cuda") + y = torch.randn((2048, 2048), device="cuda") + z = x @ y + torch.cuda.synchronize() + print(f"matmul_ok shape={tuple(z.shape)} dtype={z.dtype} device={z.device}") + + +if __name__ == "__main__": + main() diff --git a/src/api.rs b/src/api.rs index 2cd8bb0..97ce86b 100644 --- a/src/api.rs +++ b/src/api.rs @@ -46,7 +46,6 @@ impl OneSampleToyPipeline { } } -#[cfg(feature = "burn")] pub mod burn_training { use std::{ collections::BTreeMap, diff --git a/src/burn_adapter.rs b/src/burn_adapter.rs index 7f7e02d..938939c 100644 --- a/src/burn_adapter.rs +++ b/src/burn_adapter.rs @@ -1,12 +1,8 @@ -#[cfg(feature = "burn")] use burn::tensor::{backend::Backend, Int, Tensor}; -#[cfg(feature = "burn")] use crate::geometry::PoseState; -#[cfg(feature = "burn")] use crate::graph::GraphFeatures; -#[cfg(feature = "burn")] pub struct GraphTensors { pub node_features: Tensor, pub edge_index: Tensor, @@ -14,7 +10,6 @@ pub struct GraphTensors { pub torsion_edge_index: Tensor, } -#[cfg(feature = "burn")] impl GraphTensors { pub fn from_features(features: &GraphFeatures, device: &B::Device) -> Self { Self { @@ -35,14 +30,12 @@ impl GraphTensors { } } -#[cfg(feature = "burn")] pub struct PoseTensors { pub translation: Tensor, pub rotation: Tensor, pub torsions: Tensor, } -#[cfg(feature = "burn")] impl PoseTensors { pub fn from_pose_batch(poses: &[PoseState], device: &B::Device) -> Self { let batch = poses.len();