This commit is contained in:
Zengtudor 2024-10-19 14:27:14 +08:00
parent 1410ff270a
commit a713db9a4f
4 changed files with 136 additions and 5 deletions

View File

@ -26,7 +26,7 @@ foreach(SRC_CPP IN LISTS SRC_CPPS)
message("found cpp : ${SRC_CPP}") message("found cpp : ${SRC_CPP}")
endforeach() endforeach()
add_executable(${PROJECT_NAME}_bin ${SRC_HPPS} ${SRC_CPPS}) # add_executable(${PROJECT_NAME}_bin ${SRC_HPPS} ${SRC_CPPS})
add_subdirectory(pybind11) add_subdirectory(pybind11)

11
dna.pyi Normal file
View File

@ -0,0 +1,11 @@
# void convert_from_file(
# std::string source_file=std::string("filteredReads.txt"),
# std::string destination_file=std::string("reversedSequence.txt"),
# size_t buffer_size_gb=4
# )
def convert_from_file(
source_file:str="filteredReads.txt",
destination_file:str="reversedSequence.txt",
buffer_size_gb:int=4
)->None:...

View File

@ -1 +1,37 @@
#pragma once #pragma once
#include <iostream>
#include <sstream>
#include <string>
#include <utility>
#include <chrono>
template<class ...Args>
std::string fmt(Args&&...args){
std::ostringstream oss;
(oss<<...<<std::forward<Args>(args));
return std::move(std::string(oss.str()));
}
template<class ...Args>
void print(Args...args){
std::ostringstream oss;
(oss<<...<<std::forward<Args>(args));
std::cout<<oss.str();
}
#define NVC(v)#v," : ",(v)
class Spent{ // 使用RAII原理的自动计时器计算主函数运行时间析构时自动输出
private:
const decltype(std::chrono::system_clock::now()) start;
const std::string name;
public:
Spent(const std::string name)noexcept:start(std::chrono::system_clock::now()),name(name){
print("[Timer: ",name,"]"," Start timing","\n");
}
~Spent()noexcept{
const auto end = std::chrono::system_clock::now();
const auto dur = std::chrono::duration_cast<std::chrono::milliseconds> (end-start);
print("[Timer: ",name,"]"," Stop timing , used ", dur.count(),"ms\n");
}
};

View File

@ -1,8 +1,92 @@
#include "dna.hpp" #include "dna.hpp"
#include <array>
#include <cstddef>
#include <cstring>
#include <fstream>
#include <iostream> #include <iostream>
#include #include <istream>
#include <pybind11/cast.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <stdexcept>
#include <string>
int main(){ namespace py = pybind11;
static constexpr size_t default_buffer_size{512*1024*1024};
static const std::unordered_map<char, char> complement = { //这里使用查表的方式大大提高CPU速度因为if分支CPU不容易命中缓存需要使用查表加速
{'A', 'T'}, {'a', 'T'},
{'T', 'A'}, {'t', 'A'},
{'C', 'G'}, {'c', 'G'},
{'G', 'C'}, {'g', 'C'}
};
void reverseComplement(char *begin, char *end)
{
//注意end是开区间不能访问end
std::reverse(begin, end); //翻转DNA序列
for (ptrdiff_t i = 0; i < (end - begin); ++i) {
// static int _ = (zt::print(NAME_VALUE(omp_get_num_threads()),"\n"),0); // 打印线程数量
auto it = complement.find(begin[i]);
if (it != complement.end()) {
begin[i] = it->second;
}
}
} }
void convert_from_file(
std::string source_file=std::string("filteredReads.txt"),
std::string destination_file=std::string("reversedSequence.txt"),
size_t buffer_size=default_buffer_size
)
{
// std::iostream::sync_with_stdio(false);
const size_t max_size_pre_dna{(size_t)5e4+5};
const size_t all_buf_size = {buffer_size};
const size_t read_bufsize{all_buf_size/2},write_bufsize{all_buf_size/2};
print(NVC(max_size_pre_dna),'\n',
NVC(all_buf_size),'\n',
NVC(read_bufsize),'\n',
NVC(write_bufsize),'\n');
std::ifstream ifs(source_file);
if(ifs.is_open()==false)throw std::runtime_error(fmt("Cannot open input file stream\nfilename: ",source_file,'\n'));
std::ofstream ofs(destination_file);
if(ofs.is_open()==false)throw std::runtime_error(fmt("Cannot open output file stream\nfilename: ",destination_file,'\n'));
std::cout<<"Open file ok ,getting memory\n";
std::vector<char> read_buf(read_bufsize), write_buf(write_bufsize);
ifs.rdbuf()->pubsetbuf(read_buf.data(), read_buf.size());
ofs.rdbuf()->pubsetbuf(write_buf.data(), write_buf.size());
std::array<char, max_size_pre_dna> dna_buf;
bool is_dna_line{false};
std::cout<<"computing\n";
Spent all_spent("all_spent_time");
while(ifs.getline(dna_buf.data(),dna_buf.size())){
const size_t new_buflen{strlen(dna_buf.data())};
if(is_dna_line){
reverseComplement(dna_buf.data(), dna_buf.data()+new_buflen);
// std::cout<<"complete one ok\n";
}
// print(NVC(new_buflen),'\n');
// print(NVC(dna_buf.data()));
dna_buf[new_buflen]='\n';
ofs.write(dna_buf.data(),new_buflen+1);
is_dna_line=!is_dna_line;
}
ofs.flush();
print("done\n");
}
PYBIND11_MODULE(dna, m) {
m.doc() = "A dna base conversion library"; // optional module docstring
m.def("convert_from_file", &convert_from_file, "dna base switching from file.",
py::arg("source_file")= std::string("filteredReads.txt"),
py::arg("destination_file")=std::string("reversedSequence.txt"),
py::arg("buffer_size")=default_buffer_size
);
}