alignStats.Rd
Generate data frame containing important read alignment statistics such as the total number of reads in the FASTQ files, the number of total alignments, as well as the number of primary alignments in the corresponding BAM files.
alignStats(args, fqpaths, pairEnd = TRUE, output_index = 1, subset="FileName1")
Object of class SYSargs
or SYSargs2
or named character vector
with BAM files PATH and the elements names should be the sampleID.
named character vector
with raw FASTQ files PATH and the names should be the sampleID.
Required when class(args)
is "character".
logical. For pair-end libraries, select TRUE
.
A numeric index positions of the file in SYSargs2
object, slot output
. Default is output_index=1
.
subset
are the variables defined in the param.yml
file, for example "FileName1".
data.frame
with alignment statistics.
clusterRun
and runCommandline
and output_update
##########################################
## Examples with \code{SYSargs2} object ##
##########################################
## Construct SYSargs2 object from CWl param, CWL input, and targets files
targetspath <- system.file("extdata", "targets.txt", package="systemPipeR")
dir_path <- system.file("extdata/cwl", package="systemPipeR")
WF <- loadWorkflow(targets=targetspath, wf_file="hisat2/hisat2-mapping-se.cwl",
input_file="hisat2/hisat2-mapping-se.yml", dir_path=dir_path)
WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
WF
#> Instance of 'SYSargs2':
#> Slot names/accessors:
#> targets: 18 (M1A...V12B), targetsheader: 4 (lines)
#> modules: 1
#> wf: 0, clt: 1, yamlinput: 7 (inputs)
#> input: 18, output: 18
#> cmdlist: 18
#> Sub Steps:
#> 1. hisat2-mapping-se (rendered: TRUE)
#>
#>
names(WF); modules(WF); targets(WF)[1]; cmdlist(WF)[1:2]; output(WF)
#> [1] "targets" "targetsheader" "modules"
#> [4] "wf" "clt" "yamlinput"
#> [7] "cmdlist" "input" "output"
#> [10] "files" "inputvars" "cmdToCwl"
#> [13] "status" "internal_outfiles"
#> module1
#> "hisat2/2.1.0"
#> $M1A
#> $M1A$FileName
#> [1] "./data/SRR446027_1.fastq.gz"
#>
#> $M1A$SampleName
#> [1] "M1A"
#>
#> $M1A$Factor
#> [1] "M1"
#>
#> $M1A$SampleLong
#> [1] "Mock.1h.A"
#>
#> $M1A$Experiment
#> [1] 1
#>
#> $M1A$Date
#> [1] "23-Mar-2012"
#>
#>
#> $M1A
#> $M1A$`hisat2-mapping-se`
#> [1] "hisat2 -S ./results/M1A.sam -x ./data/tair10.fasta -k 1 --min-intronlen 30 --max-intronlen 3000 -U ./data/SRR446027_1.fastq.gz --threads 4"
#>
#>
#> $M1B
#> $M1B$`hisat2-mapping-se`
#> [1] "hisat2 -S ./results/M1B.sam -x ./data/tair10.fasta -k 1 --min-intronlen 30 --max-intronlen 3000 -U ./data/SRR446028_1.fastq.gz --threads 4"
#>
#>
#> $M1A
#> $M1A$`hisat2-mapping-se`
#> [1] "./results/M1A.sam"
#>
#>
#> $M1B
#> $M1B$`hisat2-mapping-se`
#> [1] "./results/M1B.sam"
#>
#>
#> $A1A
#> $A1A$`hisat2-mapping-se`
#> [1] "./results/A1A.sam"
#>
#>
#> $A1B
#> $A1B$`hisat2-mapping-se`
#> [1] "./results/A1B.sam"
#>
#>
#> $V1A
#> $V1A$`hisat2-mapping-se`
#> [1] "./results/V1A.sam"
#>
#>
#> $V1B
#> $V1B$`hisat2-mapping-se`
#> [1] "./results/V1B.sam"
#>
#>
#> $M6A
#> $M6A$`hisat2-mapping-se`
#> [1] "./results/M6A.sam"
#>
#>
#> $M6B
#> $M6B$`hisat2-mapping-se`
#> [1] "./results/M6B.sam"
#>
#>
#> $A6A
#> $A6A$`hisat2-mapping-se`
#> [1] "./results/A6A.sam"
#>
#>
#> $A6B
#> $A6B$`hisat2-mapping-se`
#> [1] "./results/A6B.sam"
#>
#>
#> $V6A
#> $V6A$`hisat2-mapping-se`
#> [1] "./results/V6A.sam"
#>
#>
#> $V6B
#> $V6B$`hisat2-mapping-se`
#> [1] "./results/V6B.sam"
#>
#>
#> $M12A
#> $M12A$`hisat2-mapping-se`
#> [1] "./results/M12A.sam"
#>
#>
#> $M12B
#> $M12B$`hisat2-mapping-se`
#> [1] "./results/M12B.sam"
#>
#>
#> $A12A
#> $A12A$`hisat2-mapping-se`
#> [1] "./results/A12A.sam"
#>
#>
#> $A12B
#> $A12B$`hisat2-mapping-se`
#> [1] "./results/A12B.sam"
#>
#>
#> $V12A
#> $V12A$`hisat2-mapping-se`
#> [1] "./results/V12A.sam"
#>
#>
#> $V12B
#> $V12B$`hisat2-mapping-se`
#> [1] "./results/V12B.sam"
#>
#>
if (FALSE) {
## Execute SYSargs2 on single machine
WF <- runCommandline(args=WF, make_bam=TRUE)
## Alignment stats
read_statsDF <- alignStats(WF, subset="FileName")
write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t")
}
#########################################
## Examples with \code{SYSargs} object ##
#########################################
## Construct SYSargs object from param and targets files
param <- system.file("extdata", "hisat2.param", package="systemPipeR")
targets <- system.file("extdata", "targets.txt", package="systemPipeR")
args <- systemArgs(sysma=param, mytargets=targets)
#> Warning: path[1]="./data/SRR446027_1.fastq.gz": No such file or directory
#> Warning: path[2]="./data/SRR446028_1.fastq.gz": No such file or directory
#> Warning: path[3]="./data/SRR446029_1.fastq.gz": No such file or directory
#> Warning: path[4]="./data/SRR446030_1.fastq.gz": No such file or directory
#> Warning: path[5]="./data/SRR446031_1.fastq.gz": No such file or directory
#> Warning: path[6]="./data/SRR446032_1.fastq.gz": No such file or directory
#> Warning: path[7]="./data/SRR446033_1.fastq.gz": No such file or directory
#> Warning: path[8]="./data/SRR446034_1.fastq.gz": No such file or directory
#> Warning: path[9]="./data/SRR446035_1.fastq.gz": No such file or directory
#> Warning: path[10]="./data/SRR446036_1.fastq.gz": No such file or directory
#> Warning: path[11]="./data/SRR446037_1.fastq.gz": No such file or directory
#> Warning: path[12]="./data/SRR446038_1.fastq.gz": No such file or directory
#> Warning: path[13]="./data/SRR446039_1.fastq.gz": No such file or directory
#> Warning: path[14]="./data/SRR446040_1.fastq.gz": No such file or directory
#> Warning: path[15]="./data/SRR446041_1.fastq.gz": No such file or directory
#> Warning: path[16]="./data/SRR446042_1.fastq.gz": No such file or directory
#> Warning: path[17]="./data/SRR446043_1.fastq.gz": No such file or directory
#> Warning: path[18]="./data/SRR446044_1.fastq.gz": No such file or directory
args
#> An instance of 'SYSargs' for running 'hisat2' on 18 samples
names(args); modules(args); cores(args); outpaths(args); sysargs(args)
#> [1] "targetsin" "targetsout" "targetsheader" "modules"
#> [5] "software" "cores" "other" "reference"
#> [9] "results" "infile1" "infile2" "outfile1"
#> [13] "sysargs" "outpaths"
#> [1] "hisat2/2.1.0"
#> [1] 4
#> M1A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446027_1.fastq.gz.hisat.bam"
#> M1B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446028_1.fastq.gz.hisat.bam"
#> A1A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446029_1.fastq.gz.hisat.bam"
#> A1B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446030_1.fastq.gz.hisat.bam"
#> V1A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446031_1.fastq.gz.hisat.bam"
#> V1B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446032_1.fastq.gz.hisat.bam"
#> M6A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446033_1.fastq.gz.hisat.bam"
#> M6B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446034_1.fastq.gz.hisat.bam"
#> A6A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446035_1.fastq.gz.hisat.bam"
#> A6B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446036_1.fastq.gz.hisat.bam"
#> V6A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446037_1.fastq.gz.hisat.bam"
#> V6B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446038_1.fastq.gz.hisat.bam"
#> M12A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446039_1.fastq.gz.hisat.bam"
#> M12B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446040_1.fastq.gz.hisat.bam"
#> A12A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446041_1.fastq.gz.hisat.bam"
#> A12B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446042_1.fastq.gz.hisat.bam"
#> V12A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446043_1.fastq.gz.hisat.bam"
#> V12B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446044_1.fastq.gz.hisat.bam"
#> M1A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446027_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446027_1.fastq.gz "
#> M1B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446028_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446028_1.fastq.gz "
#> A1A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446029_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446029_1.fastq.gz "
#> A1B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446030_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446030_1.fastq.gz "
#> V1A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446031_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446031_1.fastq.gz "
#> V1B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446032_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446032_1.fastq.gz "
#> M6A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446033_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446033_1.fastq.gz "
#> M6B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446034_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446034_1.fastq.gz "
#> A6A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446035_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446035_1.fastq.gz "
#> A6B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446036_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446036_1.fastq.gz "
#> V6A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446037_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446037_1.fastq.gz "
#> V6B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446038_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446038_1.fastq.gz "
#> M12A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446039_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446039_1.fastq.gz "
#> M12B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446040_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446040_1.fastq.gz "
#> A12A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446041_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446041_1.fastq.gz "
#> A12B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446042_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446042_1.fastq.gz "
#> V12A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446043_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446043_1.fastq.gz "
#> V12B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446044_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446044_1.fastq.gz "
if (FALSE) {
## Execute SYSargs on single machine
runCommandline(args=args)
## Alignment stats
read_statsDF <- alignStats(args)
write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t")
}