Skip to content

Commit

Permalink
enable ratio merging
Browse files Browse the repository at this point in the history
  • Loading branch information
fritzsedlazeck committed Nov 26, 2019
1 parent 7eeac8e commit 07404b7
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 44 deletions.
13 changes: 8 additions & 5 deletions src/SURVIVOR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@

Parameter* Parameter::m_pInstance = NULL;

//todo: merge 1: Check all subtypes during overlap. 2: Manage split up after merge.
//todo: maybe a low mem version..?? Just storing the variant ID per input, then go back in the end..
//make file: LIBS +=-lz

void official_interface(int argc, char *argv[]) {
Expand Down Expand Up @@ -120,10 +122,10 @@ void official_interface(int argc, char *argv[]) {
if (argc == 10) {
//merge 3 SV calls from the same strain
// combine_calls_new(std::string(argv[2]), atoi(argv[3]), atoi(argv[4]), std::string(argv[5]));
combine_calls_svs(std::string(argv[2]), atoi(argv[3]), atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), atoi(argv[7]), atoi(argv[8]), std::string(argv[9]));
combine_calls_svs(std::string(argv[2]), atof(argv[3]), atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), atoi(argv[7]), atoi(argv[8]), std::string(argv[9]));
} else {
std::cerr << "File with VCF names and paths" << std::endl;
std::cerr << "max distance between breakpoints " << std::endl;
std::cerr << "max distance between breakpoints (0-1 percent of length, 1- number of bp) " << std::endl;
std::cerr << "Minimum number of supporting caller" << std::endl;
std::cerr << "Take the type into account (1==yes, else no)" << std::endl;
std::cerr << "Take the strands of SVs into account (1==yes, else no)" << std::endl;
Expand Down Expand Up @@ -236,10 +238,11 @@ void official_interface(int argc, char *argv[]) {
exit(0);

} else if (strcmp(argv[1], "genComp") == 0) {
if (argc == 4) {
summary_venn(std::string(argv[2]), std::string(argv[3]));
if (argc == 5) {
summary_venn(std::string(argv[2]), bool(atoi(argv[3])==1), std::string(argv[4]));
} else {
std::cerr << "Merged Vcf file" << std::endl;
std::cerr << "Normalize output (1==yes, else no)" << std::endl;
std::cerr << "Output: pariwise overlap matrix" << std::endl;
}
exit(0);
Expand Down Expand Up @@ -546,7 +549,7 @@ int main(int argc, char *argv[]) {
case 19:
if (argc == 4) {
//summarize venn
summary_venn(std::string(argv[2]), std::string(argv[3]));
summary_venn(std::string(argv[2]),false, std::string(argv[3]));
} else {
std::cerr << "vcf venn file" << std::endl;
std::cerr << "output file" << std::endl;
Expand Down
16 changes: 14 additions & 2 deletions src/Summarize_SV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ std::vector<int> parse_supp_vec(std::string buffer) {
return ids;
}

void summary_venn(std::string filename, std::string output) {
void summary_venn(std::string filename, bool normalize, std::string output) {
std::vector<std::vector<int> > mat;
// cout << "file: " << filename << endl;

Expand Down Expand Up @@ -600,7 +600,19 @@ void summary_venn(std::string filename, std::string output) {
FILE * file = fopen(output.c_str(), "w");
for (size_t i = 0; i < mat.size(); i++) {
for (size_t j = 0; j < mat.size(); j++) {
fprintf(file, "%i", mat[i][j]);
if(normalize){
//if(i>j){
if(mat[i][i]>0){
fprintf(file, "%e", (double)mat[i][j]/(double)mat[i][i]);
}else{
fprintf(file, "%e", 0);
}
// }else{
// fprintf(file, "%e", (double)mat[i][j]/(double)mat[i][i]);
//}
}else{
fprintf(file, "%i", mat[i][j]);
}
fprintf(file, "%c", '\t');
}
fprintf(file, "%c", '\n');
Expand Down
2 changes: 1 addition & 1 deletion src/Summarize_SV.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

using namespace std;
void summary_SV(std::string filename, int min_size, int max_size, int min_reads,std::string output);
void summary_venn(std::string filename, std::string output);
void summary_venn(std::string filename, bool normalize, std::string output);
void summary_SV_stream(int min_size, int max_size, std::string output);

#endif /* SUMMARIZE_SV_H_ */
94 changes: 63 additions & 31 deletions src/merge_vcf/IntervallTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,43 @@ bool IntervallTree::same_breakpoint(breakpoint_str first, breakpoint_str second,
return (strcmp(first.chr.c_str(), second.chr.c_str()) == 0 && (abs(first.position - second.position) < max_dist));
}

bool is_same_strand(std::pair<bool, bool> first, std::pair<bool, bool> second) {
return (first.first == second.first && first.second == second.second);
bool is_same_strand(std::pair<bool, bool> first, bool strands[]){//std::pair<bool, bool> second) {

short count=0;
if(strands[0] && first.first){
count++;
}
if(strands[1] && !first.first){
count++;
}
if(strands[2] && first.second){
count++;
}
if(strands[3] && !first.second){
count++;
}

return(bool)(count==2);


//return (first.first == second.first && first.second == second.second);
}
bool same_type(short first, short second) {
if (first == second) {
bool same_type(short first, bool types[]) {

if(types[first]==1){
return true;
} else if (((first == 3 || first == 2) && second == 5) || ((second == 3 || second == 2) && first == 5)) { // compare BND to inv or tra -> same type!
}else if(((first==3 || first ==2 ) && types[5]==1) || ((types[3]==1 || types[2]==1) && first==5)){
return true;
}

//if (first == second) {
// return true;
//} else if (((first == 3 || first == 2) && second == 5) || ((second == 3 || second == 2) && first == 5)) { // compare BND to inv or tra -> same type!
// return true;
//}
return false;
}
int get_dist(long dist, short type) {
/*int get_dist(long dist, short type) {
if (type == 3) { //TRA
return Parameter::Instance()->max_dist; //TODO: change!
Expand All @@ -32,7 +57,7 @@ int get_dist(long dist, short type) {
dist = std::max((long) Parameter::Instance()->min_length * 2, dist);
return std::min((int) (dist * 4), Parameter::Instance()->max_dist);
}
}*/

long IntervallTree::overlap_SNP(breakpoint_str start, SVS_Node * curr_svs) {

Expand Down Expand Up @@ -74,39 +99,46 @@ long IntervallTree::overlap(breakpoint_str start, breakpoint_str stop, short typ
}*/

/* if (start.position == 112179238 || start.position == 112179329) {
std::cout << "Comp: " << start.chr << " " << start.position << " " << curr_svs->first.chr << " " << curr_svs->first.position << " " << type << " " << curr_svs->type << std::endl;
}*/
std::cout << "Comp: " << start.chr << " " << start.position << " " << curr_svs->first.chr << " " << curr_svs->first.position << " " << type << " " << curr_svs->type << std::endl;
}*/

int max_dist = Parameter::Instance()->max_dist;

double max_dist = Parameter::Instance()->max_dist;
if(max_dist<1 && strcmp(start.chr.c_str(),stop.chr.c_str())!=0){
max_dist=1000;
}else if(max_dist<1){
max_dist=std::floor((double)(stop.position - start.position) * max_dist);
//std::cout<<max_dist<<std::endl;
}
/* if (Parameter::Instance()->dynamic_size) {
max_dist = std::min(get_dist(stop.position - start.position, type), get_dist(curr_svs->first.position - curr_svs->second.position, curr_svs->type)); //
}
*/
if (((!Parameter::Instance()->use_strand || is_same_strand(strands, curr_svs->strand)) && (!Parameter::Instance()->use_type || same_type(type, curr_svs->type))) && (same_breakpoint(start, curr_svs->first, max_dist) && same_breakpoint(stop, curr_svs->second, max_dist))) {
/* if (start.position == 112179238 || start.position == 112179329) {
std::cout << "MERGE" << std::endl;
std::cout << std::endl;
}*/
if (((!Parameter::Instance()->use_strand || is_same_strand(strands, curr_svs->strands)) && (!Parameter::Instance()->use_type || same_type(type, curr_svs->types))) && (same_breakpoint(start, curr_svs->first, max_dist) && same_breakpoint(stop, curr_svs->second, max_dist))) {
/* if (start.position == 112179238 || start.position == 112179329) {
std::cout << "MERGE" << std::endl;
std::cout << std::endl;
}*/
return 0; //to be merged
}

/*if (start.position == 112179238 || start.position == 112179329) {
std::cout << "no MERGE: "<<stop.chr<<" "<<stop.position<<" "<<curr_svs->second.chr<<" "<<curr_svs->second.position << std::endl;
if (is_same_strand(strands, curr_svs->strand)) {
std::cout << "\tsame strand" << std::endl;
}
if (same_type(type, curr_svs->type)) {
std::cout << "\tsame type" << std::endl;
}
std::cout << "dist: " << max_dist << std::endl;
if (same_breakpoint(start, curr_svs->first, max_dist)) {
std::cout << "\tsame breakpoint start" << std::endl;
}
if (same_breakpoint(stop, curr_svs->second, max_dist)) {
std::cout << "\tsame breakpoint stop" << std::endl;
}
std::cout << std::endl;
}*/
std::cout << "no MERGE: "<<stop.chr<<" "<<stop.position<<" "<<curr_svs->second.chr<<" "<<curr_svs->second.position << std::endl;
if (is_same_strand(strands, curr_svs->strand)) {
std::cout << "\tsame strand" << std::endl;
}
if (same_type(type, curr_svs->type)) {
std::cout << "\tsame type" << std::endl;
}
std::cout << "dist: " << max_dist << std::endl;
if (same_breakpoint(start, curr_svs->first, max_dist)) {
std::cout << "\tsame breakpoint start" << std::endl;
}
if (same_breakpoint(stop, curr_svs->second, max_dist)) {
std::cout << "\tsame breakpoint stop" << std::endl;
}
std::cout << std::endl;
}*/
/* std::cout<<"no MERGE: "<<start.position <<" "<<curr_svs->first.position <<" vs "<< stop.position << " " << curr_svs->second.position<<std::endl;
if( is_same_strand(strands, curr_svs->strand)){
std::cout<<"\tsame strand"<<std::endl;
Expand Down
2 changes: 1 addition & 1 deletion src/merge_vcf/Paramer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class Parameter {

public:
std::string version;
int max_dist;
double max_dist;
int max_caller;
bool use_type;
bool use_strand;
Expand Down
26 changes: 26 additions & 0 deletions src/merge_vcf/TNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,20 @@ class TNode {
this->data->second = stop;
this->data->type = type;
this->data->strand = strands;
if (strands.first) {
this->data->strands[0] = true;
} else {
this->data->strands[1] = true;
}

if (strands.second) {
this->data->strands[2] = true;
} else {
this->data->strands[3] = true;
}

this->data->genotype = meta_info.genotype; //do I need this?
this->data->types[type] = true;

init();
Support_Node * tmp = new Support_Node();
Expand Down Expand Up @@ -103,6 +116,19 @@ class TNode {
this->data->caller_info.push_back(tmp);
}

this->data->types[type] = true; //extend if there is an in sample merge!
if (strands.first) {
this->data->strands[0] = true;
} else {
this->data->strands[1] = true;
}

if (strands.second) {
this->data->strands[2] = true;
} else {
this->data->strands[3] = true;
}

this->data->caller_info[index]->starts.push_back(start.position);
this->data->caller_info[index]->stops.push_back(stop.position);
this->data->caller_info[index]->types.push_back(type);
Expand Down
5 changes: 4 additions & 1 deletion src/merge_vcf/combine_svs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ void parse_vcf_header(std::map<std::string, int> &chrs, std::string filename) {
}
}

void combine_calls_svs(std::string files, int max_dist, int min_support, int type_save, int strand_save, int dynamic_size, int min_svs, std::string output) {
void combine_calls_svs(std::string files, double max_dist, int min_support, int type_save, int strand_save, int dynamic_size, int min_svs, std::string output) {
std::vector<std::string> names = parse_filename(files);

Parameter::Instance()->max_caller = names.size();
Expand Down Expand Up @@ -691,6 +691,9 @@ void combine_calls_svs(std::string files, int max_dist, int min_support, int typ
}

if (support >= min_support && len > min_svs) {
if(Parameter::Instance()->use_strand || Parameter::Instance()->use_type){

}
print_entry_overlap(file, (*i), id);
}

Expand Down
19 changes: 16 additions & 3 deletions src/merge_vcf/combine_svs.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,26 @@ class SVS_Node {
//just for testing!

SVS_Node() {

type=-1;
num_support.first=-1;
num_support.second=-1;
strand.first=false;
strand.second=false;
caller_info.clear();
genotype="./.";

types[0]=false; //DEL
types[1]=false; //DUP
types[2]=false; //INV
types[3]=false; //TRA
types[4]=false; //UNK

strands[0]=false; //+
strands[1]=false; //-
strands[2]=false; //+
strands[3]=false; //-

}
~SVS_Node() {
caller_info.clear();
Expand All @@ -94,15 +107,15 @@ class SVS_Node {
std::pair<int,int> num_support;
std::pair<bool,bool> strand;
std::string genotype;


bool types[5];
bool strands[4];
};

#include "../structs.h"
#include "IntervallTree.h"
#include "../vcfs/Merge_VCF.h"

void combine_calls_svs(std::string file, int max_dist, int min_support, int type_save, int strand_save,int dynamic_size,int min_svs, std::string output);
void combine_calls_svs(std::string file, double max_dist, int min_support, int type_save, int strand_save,int dynamic_size,int min_svs, std::string output);
breakpoint_str convert_position(strcoordinate pos);
void summarize_VCF_files(std::string filename, int min_size, std::string output);
void print_entry_overlap(FILE *& file, SVS_Node * entry, int id);
Expand Down

0 comments on commit 07404b7

Please sign in to comment.