Generate data frame containing important read alignment statistics such as the total number of reads in the FASTQ files, the number of total alignments, as well as the number of primary alignments in the corresponding BAM files.

alignStats(args, fqpaths, pairEnd = TRUE, output_index = 1, subset="FileName1")

Arguments

args

Object of class SYSargs or SYSargs2 or named character vector with BAM files PATH and the elements names should be the sampleID.

fqpaths

named character vector with raw FASTQ files PATH and the names should be the sampleID. Required when class(args) is "character".

pairEnd

logical. For pair-end libraries, select TRUE.

output_index

A numeric index positions of the file in SYSargs2 object, slot output. Default is output_index=1.

subset

subset are the variables defined in the param.yml file, for example "FileName1".

Value

data.frame with alignment statistics.

See also

clusterRun and runCommandline and output_update

Author

Thomas Girke

Examples

##########################################
## Examples with \code{SYSargs2} object ##
##########################################
## Construct SYSargs2 object from CWl param, CWL input, and targets files 
targetspath <- system.file("extdata", "targets.txt", package="systemPipeR")
dir_path <- system.file("extdata/cwl", package="systemPipeR")
WF <- loadWorkflow(targets=targetspath, wf_file="hisat2/hisat2-mapping-se.cwl", 
                  input_file="hisat2/hisat2-mapping-se.yml", dir_path=dir_path)
WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
WF
#> Instance of 'SYSargs2':
#>    Slot names/accessors: 
#>       targets: 18 (M1A...V12B), targetsheader: 4 (lines)
#>       modules: 1
#>       wf: 0, clt: 1, yamlinput: 7 (inputs)
#>       input: 18, output: 18
#>       cmdlist: 18
#>    Sub Steps:
#>       1. hisat2-mapping-se (rendered: TRUE)
#> 
#> 

names(WF); modules(WF); targets(WF)[1]; cmdlist(WF)[1:2]; output(WF)
#>  [1] "targets"           "targetsheader"     "modules"          
#>  [4] "wf"                "clt"               "yamlinput"        
#>  [7] "cmdlist"           "input"             "output"           
#> [10] "files"             "inputvars"         "cmdToCwl"         
#> [13] "status"            "internal_outfiles"
#>        module1 
#> "hisat2/2.1.0" 
#> $M1A
#> $M1A$FileName
#> [1] "./data/SRR446027_1.fastq.gz"
#> 
#> $M1A$SampleName
#> [1] "M1A"
#> 
#> $M1A$Factor
#> [1] "M1"
#> 
#> $M1A$SampleLong
#> [1] "Mock.1h.A"
#> 
#> $M1A$Experiment
#> [1] 1
#> 
#> $M1A$Date
#> [1] "23-Mar-2012"
#> 
#> 
#> $M1A
#> $M1A$`hisat2-mapping-se`
#> [1] "hisat2 -S ./results/M1A.sam  -x ./data/tair10.fasta  -k 1  --min-intronlen 30  --max-intronlen 3000  -U ./data/SRR446027_1.fastq.gz --threads 4"
#> 
#> 
#> $M1B
#> $M1B$`hisat2-mapping-se`
#> [1] "hisat2 -S ./results/M1B.sam  -x ./data/tair10.fasta  -k 1  --min-intronlen 30  --max-intronlen 3000  -U ./data/SRR446028_1.fastq.gz --threads 4"
#> 
#> 
#> $M1A
#> $M1A$`hisat2-mapping-se`
#> [1] "./results/M1A.sam"
#> 
#> 
#> $M1B
#> $M1B$`hisat2-mapping-se`
#> [1] "./results/M1B.sam"
#> 
#> 
#> $A1A
#> $A1A$`hisat2-mapping-se`
#> [1] "./results/A1A.sam"
#> 
#> 
#> $A1B
#> $A1B$`hisat2-mapping-se`
#> [1] "./results/A1B.sam"
#> 
#> 
#> $V1A
#> $V1A$`hisat2-mapping-se`
#> [1] "./results/V1A.sam"
#> 
#> 
#> $V1B
#> $V1B$`hisat2-mapping-se`
#> [1] "./results/V1B.sam"
#> 
#> 
#> $M6A
#> $M6A$`hisat2-mapping-se`
#> [1] "./results/M6A.sam"
#> 
#> 
#> $M6B
#> $M6B$`hisat2-mapping-se`
#> [1] "./results/M6B.sam"
#> 
#> 
#> $A6A
#> $A6A$`hisat2-mapping-se`
#> [1] "./results/A6A.sam"
#> 
#> 
#> $A6B
#> $A6B$`hisat2-mapping-se`
#> [1] "./results/A6B.sam"
#> 
#> 
#> $V6A
#> $V6A$`hisat2-mapping-se`
#> [1] "./results/V6A.sam"
#> 
#> 
#> $V6B
#> $V6B$`hisat2-mapping-se`
#> [1] "./results/V6B.sam"
#> 
#> 
#> $M12A
#> $M12A$`hisat2-mapping-se`
#> [1] "./results/M12A.sam"
#> 
#> 
#> $M12B
#> $M12B$`hisat2-mapping-se`
#> [1] "./results/M12B.sam"
#> 
#> 
#> $A12A
#> $A12A$`hisat2-mapping-se`
#> [1] "./results/A12A.sam"
#> 
#> 
#> $A12B
#> $A12B$`hisat2-mapping-se`
#> [1] "./results/A12B.sam"
#> 
#> 
#> $V12A
#> $V12A$`hisat2-mapping-se`
#> [1] "./results/V12A.sam"
#> 
#> 
#> $V12B
#> $V12B$`hisat2-mapping-se`
#> [1] "./results/V12B.sam"
#> 
#> 

if (FALSE) {
## Execute SYSargs2 on single machine
WF <- runCommandline(args=WF, make_bam=TRUE)

## Alignment stats
read_statsDF <- alignStats(WF, subset="FileName") 
write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t")
}

#########################################
## Examples with \code{SYSargs} object ##
#########################################
## Construct SYSargs object from param and targets files 
param <- system.file("extdata", "hisat2.param", package="systemPipeR")
targets <- system.file("extdata", "targets.txt", package="systemPipeR")
args <- systemArgs(sysma=param, mytargets=targets)
#> Warning: path[1]="./data/SRR446027_1.fastq.gz": No such file or directory
#> Warning: path[2]="./data/SRR446028_1.fastq.gz": No such file or directory
#> Warning: path[3]="./data/SRR446029_1.fastq.gz": No such file or directory
#> Warning: path[4]="./data/SRR446030_1.fastq.gz": No such file or directory
#> Warning: path[5]="./data/SRR446031_1.fastq.gz": No such file or directory
#> Warning: path[6]="./data/SRR446032_1.fastq.gz": No such file or directory
#> Warning: path[7]="./data/SRR446033_1.fastq.gz": No such file or directory
#> Warning: path[8]="./data/SRR446034_1.fastq.gz": No such file or directory
#> Warning: path[9]="./data/SRR446035_1.fastq.gz": No such file or directory
#> Warning: path[10]="./data/SRR446036_1.fastq.gz": No such file or directory
#> Warning: path[11]="./data/SRR446037_1.fastq.gz": No such file or directory
#> Warning: path[12]="./data/SRR446038_1.fastq.gz": No such file or directory
#> Warning: path[13]="./data/SRR446039_1.fastq.gz": No such file or directory
#> Warning: path[14]="./data/SRR446040_1.fastq.gz": No such file or directory
#> Warning: path[15]="./data/SRR446041_1.fastq.gz": No such file or directory
#> Warning: path[16]="./data/SRR446042_1.fastq.gz": No such file or directory
#> Warning: path[17]="./data/SRR446043_1.fastq.gz": No such file or directory
#> Warning: path[18]="./data/SRR446044_1.fastq.gz": No such file or directory
args
#> An instance of 'SYSargs' for running 'hisat2' on 18 samples 
names(args); modules(args); cores(args); outpaths(args); sysargs(args)
#>  [1] "targetsin"     "targetsout"    "targetsheader" "modules"      
#>  [5] "software"      "cores"         "other"         "reference"    
#>  [9] "results"       "infile1"       "infile2"       "outfile1"     
#> [13] "sysargs"       "outpaths"     
#> [1] "hisat2/2.1.0"
#> [1] 4
#>                                                                                               M1A 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446027_1.fastq.gz.hisat.bam" 
#>                                                                                               M1B 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446028_1.fastq.gz.hisat.bam" 
#>                                                                                               A1A 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446029_1.fastq.gz.hisat.bam" 
#>                                                                                               A1B 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446030_1.fastq.gz.hisat.bam" 
#>                                                                                               V1A 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446031_1.fastq.gz.hisat.bam" 
#>                                                                                               V1B 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446032_1.fastq.gz.hisat.bam" 
#>                                                                                               M6A 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446033_1.fastq.gz.hisat.bam" 
#>                                                                                               M6B 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446034_1.fastq.gz.hisat.bam" 
#>                                                                                               A6A 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446035_1.fastq.gz.hisat.bam" 
#>                                                                                               A6B 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446036_1.fastq.gz.hisat.bam" 
#>                                                                                               V6A 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446037_1.fastq.gz.hisat.bam" 
#>                                                                                               V6B 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446038_1.fastq.gz.hisat.bam" 
#>                                                                                              M12A 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446039_1.fastq.gz.hisat.bam" 
#>                                                                                              M12B 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446040_1.fastq.gz.hisat.bam" 
#>                                                                                              A12A 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446041_1.fastq.gz.hisat.bam" 
#>                                                                                              A12B 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446042_1.fastq.gz.hisat.bam" 
#>                                                                                              V12A 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446043_1.fastq.gz.hisat.bam" 
#>                                                                                              V12B 
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446044_1.fastq.gz.hisat.bam" 
#>                                                                                                                                                                                                                                                                      M1A 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446027_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446027_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      M1B 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446028_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446028_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      A1A 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446029_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446029_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      A1B 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446030_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446030_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      V1A 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446031_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446031_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      V1B 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446032_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446032_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      M6A 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446033_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446033_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      M6B 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446034_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446034_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      A6A 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446035_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446035_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      A6B 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446036_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446036_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      V6A 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446037_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446037_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                      V6B 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446038_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446038_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                     M12A 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446039_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446039_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                     M12B 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446040_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446040_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                     A12A 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446041_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446041_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                     A12B 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446042_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446042_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                     V12A 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446043_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446043_1.fastq.gz " 
#>                                                                                                                                                                                                                                                                     V12B 
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446044_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446044_1.fastq.gz " 

if (FALSE) {
## Execute SYSargs on single machine
runCommandline(args=args)

## Alignment stats
read_statsDF <- alignStats(args) 
write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t")
}