-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile_SE
136 lines (127 loc) · 5.4 KB
/
Snakefile_SE
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import os
project_dir="PATH/single_end/paper1"
input_se,=glob_wildcards(os.path.join(project_dir, "raw_reads_SE/{sample}.fq.gz"))
cdna_fasta="PATH/FILENAME.fa"
species="indexed.fa"
length="42" # This parameter is necessary for Kallisto. What is the average length of your reads after trimming? You can look at the MultiQC report in Step 2 below and readjust this value later and re-run the analysis. If you have questions, please read the [kallisto documentation](https://pachterlab.github.io/kallisto/manual).
sd="8" # This parameter is necessary for Kallisto. What is the average length of your reads after trimming? You can look at the MultiQC report in Step 2 below and readjust this value later and re-run the analysis. If you have questions, please read the [kallisto documentation](https://pachterlab.github.io/kallisto/manual).
trimmer_setting=["ILLUMINACLIP:adapters/adapter.fasta:2:30:10:2:True LEADING:26 TRAILING:26 SLIDINGWINDOW:4:20 MINLEN:36"] # See the manual if you want to change: https://github.com/usadellab/Trimmomatic
######################################################################################################
rule all:
input:
###### Step 1: QC raw reads
os.path.join(project_dir,"quality_control", "raw_reads_SE_fastqc_summary.html"),
###### Step 2: Filter, trim, and QC of trimmed
expand(os.path.join(project_dir,"trimmed_reads_SE/{sample}.fq.gz"), sample=input_se),
os.path.join(project_dir,"quality_control","trimmed_reads_SE_fastqc_summary.html"),
###### Step 3: Quantifying
expand(os.path.join(project_dir,"logs", "kallisto_quant_{sample}.log"), sample=input_se),
filename=os.path.join(project_dir,"quality_control","everything_summary.html")
rule multiqc_all:
input:
order=expand(os.path.join(project_dir,"logs", "kallisto_quant_{sample}.log"), sample=input_se)
output:
filename=os.path.join(project_dir,"quality_control","everything_summary.html")
conda: "envs/multiqc.yaml"
params:
everything=project_dir,
results_folder=os.path.join(project_dir, "quality_control/")
shell:
"""
multiqc -o {params.results_folder} --filename {output.filename} {params.everything}
"""
rule kallisto_quant:
input:
fastq_1 = os.path.join(project_dir, "trimmed_reads_SE/{sample}.fq.gz"),
index = os.path.join(project_dir,"kallisto_index", species + ".idx")
output:
directory(os.path.join(project_dir, "kallisto", "{sample}"))
params:
extra = "--bootstrap-samples 100 --single -l "+length+" -s "+sd
conda: "envs/kallisto.yaml"
log:
os.path.join(project_dir,"logs", "kallisto_quant_{sample}.log")
threads: 20
shell:
"""
kallisto quant {params.extra} -t {threads} -i {input.index} -o {output} {input.fastq_1} &> {log}
"""
rule kallisto_index:
input:
fasta = cdna_fasta
output:
index = os.path.join(project_dir,"kallisto_index", species + ".idx")
params:
extra = "--kmer-size=31"
log:
os.path.join(project_dir, "logs/kallisto_index.log")
threads: 10
conda: "envs/kallisto.yaml"
shell:
"""
kallisto index {params.extra} -i {output.index} {input.fasta}
"""
rule multiqc_trimmed_reads:
input:
order=expand(os.path.join(project_dir,"quality_control/trimmed_reads_SE/{sample}_fastqc.zip"), sample=input_se)
output:
filename=os.path.join(project_dir,"quality_control","trimmed_reads_SE_fastqc_summary.html")
conda: "envs/multiqc.yaml"
params:
trimmed_reads=os.path.join(project_dir,"quality_control/trimmed_reads_SE", ""),
results_folder=os.path.join(project_dir, "quality_control/")
shell:
"""
multiqc -o {params.results_folder} --filename {output.filename} {params.trimmed_reads}
"""
rule fastqc_trimmed_reads:
input:
trimmed=os.path.join(project_dir,"trimmed_reads_SE/{sample}.fq.gz")
output:
html=os.path.join(project_dir,"quality_control/trimmed_reads_SE/{sample}_fastqc.html"),
zip=os.path.join(project_dir,"quality_control/trimmed_reads_SE/{sample}_fastqc.zip")
params:
folder=os.path.join(project_dir, "quality_control/trimmed_reads_SE",""),
conda: "envs/fastqc.yaml"
shell:
"""
fastqc {input} -o {params.folder}
"""
rule trimmomatic:
input:
r1=os.path.join(project_dir,"raw_reads_SE/{sample}.fq.gz"),
output:
r1=os.path.join(project_dir,"trimmed_reads_SE/{sample}.fq.gz")
conda: "envs/trimmomatic.yaml"
params:
trimmer=trimmer_setting
shell:
"""
trimmomatic SE -threads 8 {input.r1} {output.r1} {params.trimmer}
"""
rule multiqc_raw_reads:
input:
order=expand(os.path.join(project_dir,"quality_control/raw_reads_SE/{sample}_fastqc.zip"), sample=input_se)
output:
filename=os.path.join(project_dir,"quality_control","raw_reads_SE_fastqc_summary.html"),
conda: "envs/multiqc.yaml"
params:
raw_reads=os.path.join(project_dir,"quality_control/raw_reads_SE", ""),
results_folder=os.path.join(project_dir, "quality_control/")
shell:
"""
multiqc -o {params.results_folder} --filename {output.filename} {params.raw_reads}
"""
rule fastqc_raw_reads:
input:
os.path.join(project_dir,"raw_reads_SE/{sample}.fq.gz")
output:
html=os.path.join(project_dir,"quality_control/raw_reads_SE/{sample}_fastqc.html"),
zip=os.path.join(project_dir,"quality_control/raw_reads_SE/{sample}_fastqc.zip")
params:
folder=os.path.join(project_dir, "quality_control/raw_reads_SE",""),
conda: "envs/fastqc.yaml"
shell:
"""
fastqc {input} -o {params.folder}
"""