diff --git a/oxbow/src/fastq.rs b/oxbow/src/fastq.rs index 92520c4..516510d 100644 --- a/oxbow/src/fastq.rs +++ b/oxbow/src/fastq.rs @@ -1,35 +1,31 @@ use arrow::array::{ArrayRef, GenericStringBuilder}; use arrow::{error::ArrowError, record_batch::RecordBatch}; +// use noodles::core::Region; use noodles::fastq; -use std::{ - fs::File, - io::{self, BufReader, Read, Seek}, - str, - sync::Arc, -}; +// use noodles::fastq::fai; +use std::sync::Arc; use crate::batch_builder::{write_ipc, BatchBuilder}; -/// A FASTQ reader. -pub struct FastqReader { - reader: fastq::Reader>, -} +type BufferedReader = std::io::BufReader; -impl FastqReader> { - /// Creates a Fasta reader from a given file path. - pub fn new_from_path(path: &str) -> io::Result { - let reader = File::open(path) - .map(BufReader::new) - .map(BufReader::new) - .map(fastq::Reader::new)?; - Ok(Self { reader }) - } +/// A FASTQ reader. +pub struct FastqReader { + reader: fastq::Reader, + // index: fai::Reader>, } -impl FastqReader { +impl FastqReader { /// Creates a Fastq Reader. - pub fn new(read: R) -> io::Result { - let reader = fastq::Reader::new(BufReader::new(read)); + pub fn new(path: &str) -> std::io::Result { + // let index_file = std::fs::File::open(format!("{}.fai", path))?; + // let index_bufreader = std::io::BufReader::with_capacity(1024 * 1024, index_file); + // let index = fai::Reader::new(index_bufreader); + + let file = std::fs::File::open(path)?; + let bufreader = std::io::BufReader::with_capacity(1024 * 1024, file); + let reader = fastq::reader::Reader::new(bufreader); + Ok(Self { reader }) } @@ -75,13 +71,13 @@ impl BatchBuilder for FastqBatchBuilder { fn push(&mut self, record: Self::Record<'_>) { self.name - .append_value(str::from_utf8(record.name()).unwrap()); + .append_value(std::str::from_utf8(record.name()).unwrap()); self.description - .append_value(str::from_utf8(record.description()).unwrap()); + .append_value(std::str::from_utf8(record.description()).unwrap()); self.sequence - .append_value(str::from_utf8(record.sequence()).unwrap()); + .append_value(std::str::from_utf8(record.sequence()).unwrap()); self.quality_scores - .append_value(str::from_utf8(record.quality_scores()).unwrap()); + .append_value(std::str::from_utf8(record.quality_scores()).unwrap()); } fn finish(mut self) -> Result { diff --git a/py-oxbow/src/lib.rs b/py-oxbow/src/lib.rs index 18f5446..f000b18 100644 --- a/py-oxbow/src/lib.rs +++ b/py-oxbow/src/lib.rs @@ -39,22 +39,10 @@ fn read_fasta(path: &str, region: Option<&str>) -> PyObject { } #[pyfunction] -fn read_fastq(py: Python, path_or_file_like: PyObject) -> PyObject { - if let Ok(string_ref) = path_or_file_like.downcast::(py) { - // If it's a string, treat it like a path - let mut reader = FastqReader::new_from_path(string_ref.to_str().unwrap()).unwrap(); - let ipc = reader.records_to_ipc().unwrap(); - Python::with_gil(|py| PyBytes::new(py, &ipc).into()) - } else { - // Otherwise, treat it as file-like - let file_like = match PyFileLikeObject::new(path_or_file_like, true, false, true) { - Ok(file_like) => file_like, - Err(_) => panic!("Unknown argument for `path_url_or_file_like`. Not a file path string or url, and not a file-like object."), - }; - let mut reader = FastqReader::new(file_like).unwrap(); - let ipc = reader.records_to_ipc().unwrap(); - Python::with_gil(|py| PyBytes::new(py, &ipc).into()) - } +fn read_fastq(path: &str) -> PyObject { + let mut reader = FastqReader::new(path).unwrap(); + let ipc = reader.records_to_ipc().unwrap(); + Python::with_gil(|py| PyBytes::new(py, &ipc).into()) } #[pyfunction]