clusterRun.Rd
Submits non-R command-line software to queueing/scheduling systems of compute
clusters using run specifications defined by functions similar to
runCommandline
. clusterRun
can be used with most queueing systems
since it is based on utilities from the batchtools
package which supports
the use of template files (*.tmpl
) for defining the run parameters of
the different schedulers. The path to the *.tmpl
file needs to be
specified in a conf file provided under the conffile
argument.
clusterRun(args,
FUN = runCommandline,
more.args = list(args = args, make_bam = TRUE),
conffile = ".batchtools.conf.R",
template = "batchtools.slurm.tmpl",
Njobs,
runid = "01",
resourceList)
Object of class SYSargs
or SYSargs2
.
Accepts functions such as runCommandline(args, ...)
where the
args
argument is mandatory and needs to be of class SYSargs
or SYSargs2
.
Object of class list
, which provides the arguments that control the
FUN
function.
Path to conf file (default location ./.batchtools.conf.R
). This file
contains in its simplest form just one command, such as this line for the
Slurm scheduler: cluster.functions <- makeClusterFunctionsSlurm(template="batchtools.slurm.tmpl")
.
For more detailed information visit this page: https://mllg.github.io/batchtools/index.html
The template files for a specific queueing/scheduling systems can be downloaded from here: https://github.com/mllg/batchtools/tree/master/inst/templates. Slurm, PBS/Torque, and Sun Grid Engine (SGE) templates are provided.
Interger defining the number of cluster jobs. For instance, if args
contains 18 command-line jobs and Njobs=9
, then the function will
distribute them accross 9 cluster jobs each running 2 command-line jobs.
To increase the number of CPU cores used by each process, one can do this
under the corresonding argument of the command-line tool, e.g. -p
argument for Tophat.
Run identifier used for log file to track system call commands.
Default is "01"
.
List
for reserving for each cluster job sufficient computing resources
including memory (Megabyte), number of nodes, CPU cores, walltime (minutes),
etc. For more details, one can consult the template file for each
queueing/scheduling system.
Object of class Registry
, as well as files and directories
created by the executed command-line tools.
For more details on batchtools
, please consult the following
page: https://github.com/mllg/batchtools/
clusterRun
replaces the older functions getQsubargs
and qsubRun
.
#########################################
## Examples with \code{SYSargs} object ##
#########################################
## Construct SYSargs object from param and targets files
param <- system.file("extdata", "hisat2.param", package="systemPipeR")
targets <- system.file("extdata", "targets.txt", package="systemPipeR")
args <- systemArgs(sysma=param, mytargets=targets)
#> Warning: path[1]="./data/SRR446027_1.fastq.gz": No such file or directory
#> Warning: path[2]="./data/SRR446028_1.fastq.gz": No such file or directory
#> Warning: path[3]="./data/SRR446029_1.fastq.gz": No such file or directory
#> Warning: path[4]="./data/SRR446030_1.fastq.gz": No such file or directory
#> Warning: path[5]="./data/SRR446031_1.fastq.gz": No such file or directory
#> Warning: path[6]="./data/SRR446032_1.fastq.gz": No such file or directory
#> Warning: path[7]="./data/SRR446033_1.fastq.gz": No such file or directory
#> Warning: path[8]="./data/SRR446034_1.fastq.gz": No such file or directory
#> Warning: path[9]="./data/SRR446035_1.fastq.gz": No such file or directory
#> Warning: path[10]="./data/SRR446036_1.fastq.gz": No such file or directory
#> Warning: path[11]="./data/SRR446037_1.fastq.gz": No such file or directory
#> Warning: path[12]="./data/SRR446038_1.fastq.gz": No such file or directory
#> Warning: path[13]="./data/SRR446039_1.fastq.gz": No such file or directory
#> Warning: path[14]="./data/SRR446040_1.fastq.gz": No such file or directory
#> Warning: path[15]="./data/SRR446041_1.fastq.gz": No such file or directory
#> Warning: path[16]="./data/SRR446042_1.fastq.gz": No such file or directory
#> Warning: path[17]="./data/SRR446043_1.fastq.gz": No such file or directory
#> Warning: path[18]="./data/SRR446044_1.fastq.gz": No such file or directory
args
#> An instance of 'SYSargs' for running 'hisat2' on 18 samples
names(args); modules(args); cores(args); outpaths(args); sysargs(args)
#> [1] "targetsin" "targetsout" "targetsheader" "modules"
#> [5] "software" "cores" "other" "reference"
#> [9] "results" "infile1" "infile2" "outfile1"
#> [13] "sysargs" "outpaths"
#> [1] "hisat2/2.1.0"
#> [1] 4
#> M1A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446027_1.fastq.gz.hisat.bam"
#> M1B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446028_1.fastq.gz.hisat.bam"
#> A1A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446029_1.fastq.gz.hisat.bam"
#> A1B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446030_1.fastq.gz.hisat.bam"
#> V1A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446031_1.fastq.gz.hisat.bam"
#> V1B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446032_1.fastq.gz.hisat.bam"
#> M6A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446033_1.fastq.gz.hisat.bam"
#> M6B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446034_1.fastq.gz.hisat.bam"
#> A6A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446035_1.fastq.gz.hisat.bam"
#> A6B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446036_1.fastq.gz.hisat.bam"
#> V6A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446037_1.fastq.gz.hisat.bam"
#> V6B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446038_1.fastq.gz.hisat.bam"
#> M12A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446039_1.fastq.gz.hisat.bam"
#> M12B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446040_1.fastq.gz.hisat.bam"
#> A12A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446041_1.fastq.gz.hisat.bam"
#> A12B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446042_1.fastq.gz.hisat.bam"
#> V12A
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446043_1.fastq.gz.hisat.bam"
#> V12B
#> "/home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446044_1.fastq.gz.hisat.bam"
#> M1A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446027_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446027_1.fastq.gz "
#> M1B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446028_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446028_1.fastq.gz "
#> A1A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446029_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446029_1.fastq.gz "
#> A1B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446030_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446030_1.fastq.gz "
#> V1A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446031_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446031_1.fastq.gz "
#> V1B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446032_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446032_1.fastq.gz "
#> M6A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446033_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446033_1.fastq.gz "
#> M6B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446034_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446034_1.fastq.gz "
#> A6A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446035_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446035_1.fastq.gz "
#> A6B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446036_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446036_1.fastq.gz "
#> V6A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446037_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446037_1.fastq.gz "
#> V6B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446038_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446038_1.fastq.gz "
#> M12A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446039_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446039_1.fastq.gz "
#> M12B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446040_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446040_1.fastq.gz "
#> A12A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446041_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446041_1.fastq.gz "
#> A12B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446042_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446042_1.fastq.gz "
#> V12A
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446043_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446043_1.fastq.gz "
#> V12B
#> "hisat2 -p 4 -k 1 --min-intronlen 30 --max-intronlen 3000 -S /home/runner/work/systemPipeR/systemPipeR/docs/reference/results/SRR446044_1.fastq.gz.hisat.sam /home/runner/work/systemPipeR/systemPipeR/docs/reference/data/tair10.fasta -U ./data/SRR446044_1.fastq.gz "
if (FALSE) {
## Execute SYSargs on multiple machines of a compute cluster. The following
## example uses the conf and template files for the Slurm scheduler. Please
## read the instructions on how to obtain the corresponding files for other schedulers.
file.copy(system.file("extdata", ".batchtools.conf.R", package="systemPipeR"), ".")
file.copy(system.file("extdata", "batchtools.slurm.tmpl", package="systemPipeR"), ".")
resources <- list(walltime=120, ntasks=1, ncpus=cores(args), memory=1024)
reg <- clusterRun(args, FUN = runCommandline,
more.args = list(args = args, make_bam = TRUE),
conffile=".batchtools.conf.R",
template="batchtools.slurm.tmpl",
Njobs=18, runid="01",
resourceList=resources)
## Monitor progress of submitted jobs
getStatus(reg=reg)
file.exists(outpaths(args))
}
##########################################
## Examples with \code{SYSargs2} object ##
##########################################
## Construct SYSargs2 object from CWl param, CWL input, and targets files
targets <- system.file("extdata", "targets.txt", package="systemPipeR")
dir_path <- system.file("extdata/cwl", package="systemPipeR")
WF <- loadWorkflow(targets=targets, wf_file="hisat2/hisat2-mapping-se.cwl",
input_file="hisat2/hisat2-mapping-se.yml", dir_path=dir_path)
WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
WF
#> Instance of 'SYSargs2':
#> Slot names/accessors:
#> targets: 18 (M1A...V12B), targetsheader: 4 (lines)
#> modules: 1
#> wf: 0, clt: 1, yamlinput: 7 (inputs)
#> input: 18, output: 18
#> cmdlist: 18
#> Sub Steps:
#> 1. hisat2-mapping-se (rendered: TRUE)
#>
#>
names(WF); modules(WF); targets(WF)[1]; cmdlist(WF)[1:2]; output(WF)
#> [1] "targets" "targetsheader" "modules"
#> [4] "wf" "clt" "yamlinput"
#> [7] "cmdlist" "input" "output"
#> [10] "files" "inputvars" "cmdToCwl"
#> [13] "status" "internal_outfiles"
#> module1
#> "hisat2/2.1.0"
#> $M1A
#> $M1A$FileName
#> [1] "./data/SRR446027_1.fastq.gz"
#>
#> $M1A$SampleName
#> [1] "M1A"
#>
#> $M1A$Factor
#> [1] "M1"
#>
#> $M1A$SampleLong
#> [1] "Mock.1h.A"
#>
#> $M1A$Experiment
#> [1] 1
#>
#> $M1A$Date
#> [1] "23-Mar-2012"
#>
#>
#> $M1A
#> $M1A$`hisat2-mapping-se`
#> [1] "hisat2 -S ./results/M1A.sam -x ./data/tair10.fasta -k 1 --min-intronlen 30 --max-intronlen 3000 -U ./data/SRR446027_1.fastq.gz --threads 4"
#>
#>
#> $M1B
#> $M1B$`hisat2-mapping-se`
#> [1] "hisat2 -S ./results/M1B.sam -x ./data/tair10.fasta -k 1 --min-intronlen 30 --max-intronlen 3000 -U ./data/SRR446028_1.fastq.gz --threads 4"
#>
#>
#> $M1A
#> $M1A$`hisat2-mapping-se`
#> [1] "./results/M1A.sam"
#>
#>
#> $M1B
#> $M1B$`hisat2-mapping-se`
#> [1] "./results/M1B.sam"
#>
#>
#> $A1A
#> $A1A$`hisat2-mapping-se`
#> [1] "./results/A1A.sam"
#>
#>
#> $A1B
#> $A1B$`hisat2-mapping-se`
#> [1] "./results/A1B.sam"
#>
#>
#> $V1A
#> $V1A$`hisat2-mapping-se`
#> [1] "./results/V1A.sam"
#>
#>
#> $V1B
#> $V1B$`hisat2-mapping-se`
#> [1] "./results/V1B.sam"
#>
#>
#> $M6A
#> $M6A$`hisat2-mapping-se`
#> [1] "./results/M6A.sam"
#>
#>
#> $M6B
#> $M6B$`hisat2-mapping-se`
#> [1] "./results/M6B.sam"
#>
#>
#> $A6A
#> $A6A$`hisat2-mapping-se`
#> [1] "./results/A6A.sam"
#>
#>
#> $A6B
#> $A6B$`hisat2-mapping-se`
#> [1] "./results/A6B.sam"
#>
#>
#> $V6A
#> $V6A$`hisat2-mapping-se`
#> [1] "./results/V6A.sam"
#>
#>
#> $V6B
#> $V6B$`hisat2-mapping-se`
#> [1] "./results/V6B.sam"
#>
#>
#> $M12A
#> $M12A$`hisat2-mapping-se`
#> [1] "./results/M12A.sam"
#>
#>
#> $M12B
#> $M12B$`hisat2-mapping-se`
#> [1] "./results/M12B.sam"
#>
#>
#> $A12A
#> $A12A$`hisat2-mapping-se`
#> [1] "./results/A12A.sam"
#>
#>
#> $A12B
#> $A12B$`hisat2-mapping-se`
#> [1] "./results/A12B.sam"
#>
#>
#> $V12A
#> $V12A$`hisat2-mapping-se`
#> [1] "./results/V12A.sam"
#>
#>
#> $V12B
#> $V12B$`hisat2-mapping-se`
#> [1] "./results/V12B.sam"
#>
#>
if (FALSE) {
## Execute SYSargs2 on multiple machines of a compute cluster. The following
## example uses the conf and template files for the Slurm scheduler. Please
## read the instructions on how to obtain the corresponding files for other schedulers.
file.copy(system.file("extdata", ".batchtools.conf.R", package="systemPipeR"), ".")
file.copy(system.file("extdata", "batchtools.slurm.tmpl", package="systemPipeR"), ".")
resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024)
reg <- clusterRun(WF, FUN = runCommandline,
more.args = list(args = WF, make_bam = TRUE),
conffile=".batchtools.conf.R",
template="batchtools.slurm.tmpl",
Njobs=18, runid="01", resourceList=resources)
## Monitor progress of submitted jobs
getStatus(reg=reg)
## Updates the path in the object \code{output(WF)}
WF <- output_update(WF, dir=FALSE, replace=TRUE, extension=c(".sam", ".bam"))
## Alignment stats
read_statsDF <- alignStats(WF)
read_statsDF <- cbind(read_statsDF[targets$FileName,], targets)
write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE,
quote=FALSE, sep="\t")
}