update
This commit is contained in:
parent
1410ff270a
commit
a713db9a4f
@ -26,7 +26,7 @@ foreach(SRC_CPP IN LISTS SRC_CPPS)
|
|||||||
message("found cpp : ${SRC_CPP}")
|
message("found cpp : ${SRC_CPP}")
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
add_executable(${PROJECT_NAME}_bin ${SRC_HPPS} ${SRC_CPPS})
|
# add_executable(${PROJECT_NAME}_bin ${SRC_HPPS} ${SRC_CPPS})
|
||||||
|
|
||||||
add_subdirectory(pybind11)
|
add_subdirectory(pybind11)
|
||||||
|
|
||||||
|
11
dna.pyi
Normal file
11
dna.pyi
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# void convert_from_file(
|
||||||
|
# std::string source_file=std::string("filteredReads.txt"),
|
||||||
|
# std::string destination_file=std::string("reversedSequence.txt"),
|
||||||
|
# size_t buffer_size_gb=4
|
||||||
|
# )
|
||||||
|
|
||||||
|
def convert_from_file(
|
||||||
|
source_file:str="filteredReads.txt",
|
||||||
|
destination_file:str="reversedSequence.txt",
|
||||||
|
buffer_size_gb:int=4
|
||||||
|
)->None:...
|
38
src/dna.hpp
38
src/dna.hpp
@ -1 +1,37 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
template<class ...Args>
|
||||||
|
std::string fmt(Args&&...args){
|
||||||
|
std::ostringstream oss;
|
||||||
|
(oss<<...<<std::forward<Args>(args));
|
||||||
|
return std::move(std::string(oss.str()));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class ...Args>
|
||||||
|
void print(Args...args){
|
||||||
|
std::ostringstream oss;
|
||||||
|
(oss<<...<<std::forward<Args>(args));
|
||||||
|
std::cout<<oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NVC(v)#v," : ",(v)
|
||||||
|
|
||||||
|
class Spent{ // 使用RAII原理的自动计时器,计算主函数运行时间,析构时自动输出
|
||||||
|
private:
|
||||||
|
const decltype(std::chrono::system_clock::now()) start;
|
||||||
|
const std::string name;
|
||||||
|
public:
|
||||||
|
Spent(const std::string name)noexcept:start(std::chrono::system_clock::now()),name(name){
|
||||||
|
print("[Timer: ",name,"]"," Start timing","\n");
|
||||||
|
}
|
||||||
|
~Spent()noexcept{
|
||||||
|
const auto end = std::chrono::system_clock::now();
|
||||||
|
const auto dur = std::chrono::duration_cast<std::chrono::milliseconds> (end-start);
|
||||||
|
print("[Timer: ",name,"]"," Stop timing , used ", dur.count(),"ms\n");
|
||||||
|
}
|
||||||
|
};
|
90
src/main.cpp
90
src/main.cpp
@ -1,8 +1,92 @@
|
|||||||
#include "dna.hpp"
|
#include "dna.hpp"
|
||||||
|
#include <array>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstring>
|
||||||
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include
|
#include <istream>
|
||||||
|
#include <pybind11/cast.h>
|
||||||
|
#include <pybind11/pybind11.h>
|
||||||
|
#include <pybind11/stl.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
int main(){
|
namespace py = pybind11;
|
||||||
|
static constexpr size_t default_buffer_size{512*1024*1024};
|
||||||
|
|
||||||
|
static const std::unordered_map<char, char> complement = { //这里使用查表的方式大大提高CPU速度,因为if分支CPU不容易命中缓存,需要使用查表加速
|
||||||
|
{'A', 'T'}, {'a', 'T'},
|
||||||
|
{'T', 'A'}, {'t', 'A'},
|
||||||
|
{'C', 'G'}, {'c', 'G'},
|
||||||
|
{'G', 'C'}, {'g', 'C'}
|
||||||
|
};
|
||||||
|
|
||||||
|
void reverseComplement(char *begin, char *end)
|
||||||
|
{
|
||||||
|
//注意end是开区间,不能访问end
|
||||||
|
std::reverse(begin, end); //翻转DNA序列
|
||||||
|
|
||||||
|
for (ptrdiff_t i = 0; i < (end - begin); ++i) {
|
||||||
|
// static int _ = (zt::print(NAME_VALUE(omp_get_num_threads()),"\n"),0); // 打印线程数量
|
||||||
|
auto it = complement.find(begin[i]);
|
||||||
|
if (it != complement.end()) {
|
||||||
|
begin[i] = it->second;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void convert_from_file(
|
||||||
|
std::string source_file=std::string("filteredReads.txt"),
|
||||||
|
std::string destination_file=std::string("reversedSequence.txt"),
|
||||||
|
size_t buffer_size=default_buffer_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
// std::iostream::sync_with_stdio(false);
|
||||||
|
|
||||||
|
const size_t max_size_pre_dna{(size_t)5e4+5};
|
||||||
|
const size_t all_buf_size = {buffer_size};
|
||||||
|
const size_t read_bufsize{all_buf_size/2},write_bufsize{all_buf_size/2};
|
||||||
|
|
||||||
|
print(NVC(max_size_pre_dna),'\n',
|
||||||
|
NVC(all_buf_size),'\n',
|
||||||
|
NVC(read_bufsize),'\n',
|
||||||
|
NVC(write_bufsize),'\n');
|
||||||
|
std::ifstream ifs(source_file);
|
||||||
|
if(ifs.is_open()==false)throw std::runtime_error(fmt("Cannot open input file stream\nfilename: ",source_file,'\n'));
|
||||||
|
|
||||||
|
std::ofstream ofs(destination_file);
|
||||||
|
if(ofs.is_open()==false)throw std::runtime_error(fmt("Cannot open output file stream\nfilename: ",destination_file,'\n'));
|
||||||
|
std::cout<<"Open file ok ,getting memory\n";
|
||||||
|
std::vector<char> read_buf(read_bufsize), write_buf(write_bufsize);
|
||||||
|
ifs.rdbuf()->pubsetbuf(read_buf.data(), read_buf.size());
|
||||||
|
ofs.rdbuf()->pubsetbuf(write_buf.data(), write_buf.size());
|
||||||
|
|
||||||
|
std::array<char, max_size_pre_dna> dna_buf;
|
||||||
|
bool is_dna_line{false};
|
||||||
|
std::cout<<"computing\n";
|
||||||
|
Spent all_spent("all_spent_time");
|
||||||
|
while(ifs.getline(dna_buf.data(),dna_buf.size())){
|
||||||
|
const size_t new_buflen{strlen(dna_buf.data())};
|
||||||
|
if(is_dna_line){
|
||||||
|
reverseComplement(dna_buf.data(), dna_buf.data()+new_buflen);
|
||||||
|
// std::cout<<"complete one ok\n";
|
||||||
|
}
|
||||||
|
// print(NVC(new_buflen),'\n');
|
||||||
|
// print(NVC(dna_buf.data()));
|
||||||
|
dna_buf[new_buflen]='\n';
|
||||||
|
ofs.write(dna_buf.data(),new_buflen+1);
|
||||||
|
is_dna_line=!is_dna_line;
|
||||||
|
}
|
||||||
|
ofs.flush();
|
||||||
|
print("done\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
PYBIND11_MODULE(dna, m) {
|
||||||
|
m.doc() = "A dna base conversion library"; // optional module docstring
|
||||||
|
|
||||||
|
m.def("convert_from_file", &convert_from_file, "dna base switching from file.",
|
||||||
|
py::arg("source_file")= std::string("filteredReads.txt"),
|
||||||
|
py::arg("destination_file")=std::string("reversedSequence.txt"),
|
||||||
|
py::arg("buffer_size")=default_buffer_size
|
||||||
|
);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user