update
This commit is contained in:
parent
1410ff270a
commit
a713db9a4f
@ -26,7 +26,7 @@ foreach(SRC_CPP IN LISTS SRC_CPPS)
|
||||
message("found cpp : ${SRC_CPP}")
|
||||
endforeach()
|
||||
|
||||
add_executable(${PROJECT_NAME}_bin ${SRC_HPPS} ${SRC_CPPS})
|
||||
# add_executable(${PROJECT_NAME}_bin ${SRC_HPPS} ${SRC_CPPS})
|
||||
|
||||
add_subdirectory(pybind11)
|
||||
|
||||
|
11
dna.pyi
Normal file
11
dna.pyi
Normal file
@ -0,0 +1,11 @@
|
||||
# void convert_from_file(
|
||||
# std::string source_file=std::string("filteredReads.txt"),
|
||||
# std::string destination_file=std::string("reversedSequence.txt"),
|
||||
# size_t buffer_size_gb=4
|
||||
# )
|
||||
|
||||
def convert_from_file(
|
||||
source_file:str="filteredReads.txt",
|
||||
destination_file:str="reversedSequence.txt",
|
||||
buffer_size_gb:int=4
|
||||
)->None:...
|
36
src/dna.hpp
36
src/dna.hpp
@ -1 +1,37 @@
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <chrono>
|
||||
|
||||
template<class ...Args>
|
||||
std::string fmt(Args&&...args){
|
||||
std::ostringstream oss;
|
||||
(oss<<...<<std::forward<Args>(args));
|
||||
return std::move(std::string(oss.str()));
|
||||
}
|
||||
|
||||
template<class ...Args>
|
||||
void print(Args...args){
|
||||
std::ostringstream oss;
|
||||
(oss<<...<<std::forward<Args>(args));
|
||||
std::cout<<oss.str();
|
||||
}
|
||||
|
||||
#define NVC(v)#v," : ",(v)
|
||||
|
||||
class Spent{ // 使用RAII原理的自动计时器,计算主函数运行时间,析构时自动输出
|
||||
private:
|
||||
const decltype(std::chrono::system_clock::now()) start;
|
||||
const std::string name;
|
||||
public:
|
||||
Spent(const std::string name)noexcept:start(std::chrono::system_clock::now()),name(name){
|
||||
print("[Timer: ",name,"]"," Start timing","\n");
|
||||
}
|
||||
~Spent()noexcept{
|
||||
const auto end = std::chrono::system_clock::now();
|
||||
const auto dur = std::chrono::duration_cast<std::chrono::milliseconds> (end-start);
|
||||
print("[Timer: ",name,"]"," Stop timing , used ", dur.count(),"ms\n");
|
||||
}
|
||||
};
|
88
src/main.cpp
88
src/main.cpp
@ -1,8 +1,92 @@
|
||||
#include "dna.hpp"
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include
|
||||
#include <istream>
|
||||
#include <pybind11/cast.h>
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/stl.h>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
int main(){
|
||||
namespace py = pybind11;
|
||||
static constexpr size_t default_buffer_size{512*1024*1024};
|
||||
|
||||
static const std::unordered_map<char, char> complement = { //这里使用查表的方式大大提高CPU速度,因为if分支CPU不容易命中缓存,需要使用查表加速
|
||||
{'A', 'T'}, {'a', 'T'},
|
||||
{'T', 'A'}, {'t', 'A'},
|
||||
{'C', 'G'}, {'c', 'G'},
|
||||
{'G', 'C'}, {'g', 'C'}
|
||||
};
|
||||
|
||||
void reverseComplement(char *begin, char *end)
|
||||
{
|
||||
//注意end是开区间,不能访问end
|
||||
std::reverse(begin, end); //翻转DNA序列
|
||||
|
||||
for (ptrdiff_t i = 0; i < (end - begin); ++i) {
|
||||
// static int _ = (zt::print(NAME_VALUE(omp_get_num_threads()),"\n"),0); // 打印线程数量
|
||||
auto it = complement.find(begin[i]);
|
||||
if (it != complement.end()) {
|
||||
begin[i] = it->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void convert_from_file(
|
||||
std::string source_file=std::string("filteredReads.txt"),
|
||||
std::string destination_file=std::string("reversedSequence.txt"),
|
||||
size_t buffer_size=default_buffer_size
|
||||
)
|
||||
{
|
||||
// std::iostream::sync_with_stdio(false);
|
||||
|
||||
const size_t max_size_pre_dna{(size_t)5e4+5};
|
||||
const size_t all_buf_size = {buffer_size};
|
||||
const size_t read_bufsize{all_buf_size/2},write_bufsize{all_buf_size/2};
|
||||
|
||||
print(NVC(max_size_pre_dna),'\n',
|
||||
NVC(all_buf_size),'\n',
|
||||
NVC(read_bufsize),'\n',
|
||||
NVC(write_bufsize),'\n');
|
||||
std::ifstream ifs(source_file);
|
||||
if(ifs.is_open()==false)throw std::runtime_error(fmt("Cannot open input file stream\nfilename: ",source_file,'\n'));
|
||||
|
||||
std::ofstream ofs(destination_file);
|
||||
if(ofs.is_open()==false)throw std::runtime_error(fmt("Cannot open output file stream\nfilename: ",destination_file,'\n'));
|
||||
std::cout<<"Open file ok ,getting memory\n";
|
||||
std::vector<char> read_buf(read_bufsize), write_buf(write_bufsize);
|
||||
ifs.rdbuf()->pubsetbuf(read_buf.data(), read_buf.size());
|
||||
ofs.rdbuf()->pubsetbuf(write_buf.data(), write_buf.size());
|
||||
|
||||
std::array<char, max_size_pre_dna> dna_buf;
|
||||
bool is_dna_line{false};
|
||||
std::cout<<"computing\n";
|
||||
Spent all_spent("all_spent_time");
|
||||
while(ifs.getline(dna_buf.data(),dna_buf.size())){
|
||||
const size_t new_buflen{strlen(dna_buf.data())};
|
||||
if(is_dna_line){
|
||||
reverseComplement(dna_buf.data(), dna_buf.data()+new_buflen);
|
||||
// std::cout<<"complete one ok\n";
|
||||
}
|
||||
// print(NVC(new_buflen),'\n');
|
||||
// print(NVC(dna_buf.data()));
|
||||
dna_buf[new_buflen]='\n';
|
||||
ofs.write(dna_buf.data(),new_buflen+1);
|
||||
is_dna_line=!is_dna_line;
|
||||
}
|
||||
ofs.flush();
|
||||
print("done\n");
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(dna, m) {
|
||||
m.doc() = "A dna base conversion library"; // optional module docstring
|
||||
|
||||
m.def("convert_from_file", &convert_from_file, "dna base switching from file.",
|
||||
py::arg("source_file")= std::string("filteredReads.txt"),
|
||||
py::arg("destination_file")=std::string("reversedSequence.txt"),
|
||||
py::arg("buffer_size")=default_buffer_size
|
||||
);
|
||||
}
|
Loading…
Reference in New Issue
Block a user