rocksdb/build_tools/gnu_parallel
Peter Dillinger ea3aa60dcc More improvements to output for CircleCI (#9201)
Summary:
More follow-up to https://github.com/facebook/rocksdb/issues/9193 + https://github.com/facebook/rocksdb/issues/9188
* Even though we need to print ETA updates to avoid hitting the 10min
timeout, we need to avoid printing an update if there's no actual
progress, so that hung tests will timeout after 10 min rather than 5
hours.
* When there is a hung test, it's really annoying to track down which
test is hung, so if no progress is observed for 1 minute, we run ps once
to show what is running.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9201

Test Plan: manual and CircleCI

Reviewed By: jay-zhuang

Differential Revision: D32612028

Pulled By: pdillinger

fbshipit-source-id: 00f8ea70fc5fec9ede28ff74287d90fc73854aad
2021-11-23 22:10:27 -08:00

7971 lines
240 KiB
Perl
Executable file

#!/usr/bin/env perl
# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and
# Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# or write to the Free Software Foundation, Inc., 51 Franklin St,
# Fifth Floor, Boston, MA 02110-1301 USA
# open3 used in Job::start
use IPC::Open3;
# &WNOHANG used in reaper
use POSIX qw(:sys_wait_h setsid ceil :errno_h);
# gensym used in Job::start
use Symbol qw(gensym);
# tempfile used in Job::start
use File::Temp qw(tempfile tempdir);
# mkpath used in openresultsfile
use File::Path;
# GetOptions used in get_options_from_array
use Getopt::Long;
# Used to ensure code quality
use strict;
use File::Basename;
if(not $ENV{HOME}) {
# $ENV{HOME} is sometimes not set if called from PHP
::warning("\$HOME not set. Using /tmp\n");
$ENV{HOME} = "/tmp";
}
save_stdin_stdout_stderr();
save_original_signal_handler();
parse_options();
::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
my $number_of_args;
if($Global::max_number_of_args) {
$number_of_args=$Global::max_number_of_args;
} elsif ($opt::X or $opt::m or $opt::xargs) {
$number_of_args = undef;
} else {
$number_of_args = 1;
}
my @command;
@command = @ARGV;
my @fhlist;
if($opt::pipepart) {
@fhlist = map { open_or_exit($_) } "/dev/null";
} else {
@fhlist = map { open_or_exit($_) } @opt::a;
if(not @fhlist and not $opt::pipe) {
@fhlist = (*STDIN);
}
}
if($opt::skip_first_line) {
# Skip the first line for the first file handle
my $fh = $fhlist[0];
<$fh>;
}
if($opt::header and not $opt::pipe) {
my $fh = $fhlist[0];
# split with colsep or \t
# $header force $colsep = \t if undef?
my $delimiter = $opt::colsep;
$delimiter ||= "\$";
my $id = 1;
for my $fh (@fhlist) {
my $line = <$fh>;
chomp($line);
::debug("init", "Delimiter: '$delimiter'");
for my $s (split /$delimiter/o, $line) {
::debug("init", "Colname: '$s'");
# Replace {colname} with {2}
# TODO accept configurable short hands
# TODO how to deal with headers in {=...=}
for(@command) {
s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
}
$Global::input_source_header{$id} = $s;
$id++;
}
}
} else {
my $id = 1;
for my $fh (@fhlist) {
$Global::input_source_header{$id} = $id;
$id++;
}
}
if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
# Parallel check all hosts are up. Remove hosts that are down
filter_hosts();
}
if($opt::nonall or $opt::onall) {
onall(@command);
wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
}
# TODO --transfer foo/./bar --cleanup
# multiple --transfer and --basefile with different /./
$Global::JobQueue = JobQueue->new(
\@command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files);
if($opt::eta or $opt::bar) {
# Count the number of jobs before starting any
$Global::JobQueue->total_jobs();
}
if($opt::pipepart) {
@Global::cat_partials = map { pipe_part_files($_) } @opt::a;
# Unget the command as many times as there are parts
$Global::JobQueue->{'commandlinequeue'}->unget(
map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials
);
}
for my $sshlogin (values %Global::host) {
$sshlogin->max_jobs_running();
}
init_run_jobs();
my $sem;
if($Global::semaphore) {
$sem = acquire_semaphore();
}
$SIG{TERM} = \&start_no_new_jobs;
start_more_jobs();
if(not $opt::pipepart) {
if($opt::pipe) {
spreadstdin();
}
}
::debug("init", "Start draining\n");
drain_job_queue();
::debug("init", "Done draining\n");
reaper();
::debug("init", "Done reaping\n");
if($opt::pipe and @opt::a) {
for my $job (@Global::tee_jobs) {
unlink $job->fh(2,"name");
$job->set_fh(2,"name","");
$job->print();
unlink $job->fh(1,"name");
}
}
::debug("init", "Cleaning\n");
cleanup();
if($Global::semaphore) {
$sem->release();
}
for(keys %Global::sshmaster) {
kill "TERM", $_;
}
::debug("init", "Halt\n");
if($opt::halt_on_error) {
wait_and_exit($Global::halt_on_error_exitstatus);
} else {
wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
}
sub __PIPE_MODE__ {}
sub pipe_part_files {
# Input:
# $file = the file to read
# Returns:
# @commands that will cat_partial each part
my ($file) = @_;
my $buf = "";
my $header = find_header(\$buf,open_or_exit($file));
# find positions
my @pos = find_split_positions($file,$opt::blocksize,length $header);
# Make @cat_partials
my @cat_partials = ();
for(my $i=0; $i<$#pos; $i++) {
push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]);
}
# Remote exec should look like:
# ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\)
# ssh -tt not allowed. Remote will die due to broken pipe anyway.
# TODO test remote with --fifo / --cat
return @cat_partials;
}
sub find_header {
# Input:
# $buf_ref = reference to read-in buffer
# $fh = filehandle to read from
# Uses:
# $opt::header
# $opt::blocksize
# Returns:
# $header string
my ($buf_ref, $fh) = @_;
my $header = "";
if($opt::header) {
if($opt::header eq ":") { $opt::header = "(.*\n)"; }
# Number = number of lines
$opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) {
if($$buf_ref=~s/^($opt::header)//) {
$header = $1;
last;
}
}
}
return $header;
}
sub find_split_positions {
# Input:
# $file = the file to read
# $block = (minimal) --block-size of each chunk
# $headerlen = length of header to be skipped
# Uses:
# $opt::recstart
# $opt::recend
# Returns:
# @positions of block start/end
my($file, $block, $headerlen) = @_;
my $size = -s $file;
$block = int $block;
# The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
# The optimal dd blocksize for freebsd = 2^15..2^17
my $dd_block_size = 131072; # 2^17
my @pos;
my ($recstart,$recend) = recstartrecend();
my $recendrecstart = $recend.$recstart;
my $fh = ::open_or_exit($file);
push(@pos,$headerlen);
for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
my $buf;
seek($fh, $pos, 0) || die;
while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
if($opt::regexp) {
# If match /$recend$recstart/ => Record position
if($buf =~ /(.*$recend)$recstart/os) {
my $i = length($1);
push(@pos,$pos+$i);
# Start looking for next record _after_ this match
$pos += $i;
last;
}
} else {
# If match $recend$recstart => Record position
my $i = index($buf,$recendrecstart);
if($i != -1) {
push(@pos,$pos+$i);
# Start looking for next record _after_ this match
$pos += $i;
last;
}
}
}
}
push(@pos,$size);
close $fh;
return @pos;
}
sub cat_partial {
# Input:
# $file = the file to read
# ($start, $end, [$start2, $end2, ...]) = start byte, end byte
# Returns:
# Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout
my($file, @start_end) = @_;
my($start, $i);
# Convert start_end to start_len
my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end;
return "<". shell_quote_scalar($file) .
q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } .
" @start_len";
}
sub spreadstdin {
# read a record
# Spawn a job and print the record to it.
# Uses:
# $opt::blocksize
# STDIN
# $opr::r
# $Global::max_lines
# $Global::max_number_of_args
# $opt::regexp
# $Global::start_no_new_jobs
# $opt::roundrobin
# %Global::running
my $buf = "";
my ($recstart,$recend) = recstartrecend();
my $recendrecstart = $recend.$recstart;
my $chunk_number = 1;
my $one_time_through;
my $blocksize = $opt::blocksize;
my $in = *STDIN;
my $header = find_header(\$buf,$in);
while(1) {
my $anything_written = 0;
if(not read($in,substr($buf,length $buf,0),$blocksize)) {
# End-of-file
$chunk_number != 1 and last;
# Force the while-loop once if everything was read by header reading
$one_time_through++ and last;
}
if($opt::r) {
# Remove empty lines
$buf =~ s/^\s*\n//gm;
if(length $buf == 0) {
next;
}
}
if($Global::max_lines and not $Global::max_number_of_args) {
# Read n-line records
my $n_lines = $buf =~ tr/\n/\n/;
my $last_newline_pos = rindex($buf,"\n");
while($n_lines % $Global::max_lines) {
$n_lines--;
$last_newline_pos = rindex($buf,"\n",$last_newline_pos-1);
}
# Chop at $last_newline_pos as that is where n-line record ends
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$buf,
$recstart,$recend,$last_newline_pos+1);
substr($buf,0,$last_newline_pos+1) = "";
} elsif($opt::regexp) {
if($Global::max_number_of_args) {
# -N => (start..*?end){n}
# -L -N => (start..*?end){n*l}
my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1);
while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) {
# Copy to modifiable variable
my $b = $1;
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$b,
$recstart,$recend,length $1);
}
} else {
# Find the last recend-recstart in $buf
if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) {
# Copy to modifiable variable
my $b = $1;
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$b,
$recstart,$recend,length $1);
}
}
} else {
if($Global::max_number_of_args) {
# -N => (start..*?end){n}
my $i = 0;
my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1);
while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) {
$i += length $recend; # find the actual splitting location
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$buf,
$recstart,$recend,$i);
substr($buf,0,$i) = "";
}
} else {
# Find the last recend-recstart in $buf
my $i = rindex($buf,$recendrecstart);
if($i != -1) {
$i += length $recend; # find the actual splitting location
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$buf,
$recstart,$recend,$i);
substr($buf,0,$i) = "";
}
}
}
if(not $anything_written and not eof($in)) {
# Nothing was written - maybe the block size < record size?
# Increase blocksize exponentially
my $old_blocksize = $blocksize;
$blocksize = ceil($blocksize * 1.3 + 1);
::warning("A record was longer than $old_blocksize. " .
"Increasing to --blocksize $blocksize\n");
}
}
::debug("init", "Done reading input\n");
# If there is anything left in the buffer write it
substr($buf,0,0) = "";
write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf);
$Global::start_no_new_jobs ||= 1;
if($opt::roundrobin) {
for my $job (values %Global::running) {
close $job->fh(0,"w");
}
my %incomplete_jobs = %Global::running;
my $sleep = 1;
while(keys %incomplete_jobs) {
my $something_written = 0;
for my $pid (keys %incomplete_jobs) {
my $job = $incomplete_jobs{$pid};
if($job->stdin_buffer_length()) {
$something_written += $job->non_block_write();
} else {
delete $incomplete_jobs{$pid}
}
}
if($something_written) {
$sleep = $sleep/2+0.001;
}
$sleep = ::reap_usleep($sleep);
}
}
}
sub recstartrecend {
# Uses:
# $opt::recstart
# $opt::recend
# Returns:
# $recstart,$recend with default values and regexp conversion
my($recstart,$recend);
if(defined($opt::recstart) and defined($opt::recend)) {
# If both --recstart and --recend is given then both must match
$recstart = $opt::recstart;
$recend = $opt::recend;
} elsif(defined($opt::recstart)) {
# If --recstart is given it must match start of record
$recstart = $opt::recstart;
$recend = "";
} elsif(defined($opt::recend)) {
# If --recend is given then it must match end of record
$recstart = "";
$recend = $opt::recend;
}
if($opt::regexp) {
# If $recstart/$recend contains '|' this should only apply to the regexp
$recstart = "(?:".$recstart.")";
$recend = "(?:".$recend.")";
} else {
# $recstart/$recend = printf strings (\n)
$recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
$recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
}
return ($recstart,$recend);
}
sub nindex {
# See if string is in buffer N times
# Returns:
# the position where the Nth copy is found
my ($buf_ref, $str, $n) = @_;
my $i = 0;
for(1..$n) {
$i = index($$buf_ref,$str,$i+1);
if($i == -1) { last }
}
return $i;
}
{
my @robin_queue;
sub round_robin_write {
# Input:
# $header_ref = ref to $header string
# $block_ref = ref to $block to be written
# $recstart = record start string
# $recend = record end string
# $endpos = end position of $block
# Uses:
# %Global::running
my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_;
my $something_written = 0;
my $block_passed = 0;
my $sleep = 1;
while(not $block_passed) {
# Continue flushing existing buffers
# until one is empty and a new block is passed
# Make a queue to spread the blocks evenly
if(not @robin_queue) {
push @robin_queue, values %Global::running;
}
while(my $job = shift @robin_queue) {
if($job->stdin_buffer_length() > 0) {
$something_written += $job->non_block_write();
} else {
$job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend);
$block_passed = 1;
$job->set_virgin(0);
$something_written += $job->non_block_write();
last;
}
}
$sleep = ::reap_usleep($sleep);
}
return $something_written;
}
}
sub write_record_to_pipe {
# Fork then
# Write record from pos 0 .. $endpos to pipe
# Input:
# $chunk_number = sequence number - to see if already run
# $header_ref = reference to header string to prepend
# $record_ref = reference to record to write
# $recstart = start string of record
# $recend = end string of record
# $endpos = position in $record_ref where record ends
# Uses:
# $Global::job_already_run
# $opt::roundrobin
# @Global::virgin_jobs
# Returns:
# Number of chunks written (0 or 1)
my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_;
if($endpos == 0) { return 0; }
if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
if($opt::roundrobin) {
return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos);
}
# If no virgin found, backoff
my $sleep = 0.0001; # 0.01 ms - better performance on highend
while(not @Global::virgin_jobs) {
::debug("pipe", "No virgin jobs");
$sleep = ::reap_usleep($sleep);
# Jobs may not be started because of loadavg
# or too little time between each ssh login.
start_more_jobs();
}
my $job = shift @Global::virgin_jobs;
# Job is no longer virgin
$job->set_virgin(0);
if(fork()) {
# Skip
} else {
# Chop of at $endpos as we do not know how many rec_sep will
# be removed.
substr($$record_ref,$endpos,length $$record_ref) = "";
# Remove rec_sep
if($opt::remove_rec_sep) {
Job::remove_rec_sep($record_ref,$recstart,$recend);
}
$job->write($header_ref);
$job->write($record_ref);
close $job->fh(0,"w");
exit(0);
}
close $job->fh(0,"w");
return 1;
}
sub __SEM_MODE__ {}
sub acquire_semaphore {
# Acquires semaphore. If needed: spawns to the background
# Uses:
# @Global::host
# Returns:
# The semaphore to be released when jobs is complete
$Global::host{':'} = SSHLogin->new(":");
my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running());
$sem->acquire();
if($Semaphore::fg) {
# skip
} else {
# If run in the background, the PID will change
# therefore release and re-acquire the semaphore
$sem->release();
if(fork()) {
exit(0);
} else {
# child
# Get a semaphore for this pid
::die_bug("Can't start a new session: $!") if setsid() == -1;
$sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running());
$sem->acquire();
}
}
return $sem;
}
sub __PARSE_OPTIONS__ {}
sub options_hash {
# Returns:
# %hash = the GetOptions config
return
("debug|D=s" => \$opt::D,
"xargs" => \$opt::xargs,
"m" => \$opt::m,
"X" => \$opt::X,
"v" => \@opt::v,
"joblog=s" => \$opt::joblog,
"results|result|res=s" => \$opt::results,
"resume" => \$opt::resume,
"resume-failed|resumefailed" => \$opt::resume_failed,
"silent" => \$opt::silent,
#"silent-error|silenterror" => \$opt::silent_error,
"keep-order|keeporder|k" => \$opt::keeporder,
"group" => \$opt::group,
"g" => \$opt::retired,
"ungroup|u" => \$opt::ungroup,
"linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer,
"tmux" => \$opt::tmux,
"null|0" => \$opt::0,
"quote|q" => \$opt::q,
# Replacement strings
"parens=s" => \$opt::parens,
"rpl=s" => \@opt::rpl,
"plus" => \$opt::plus,
"I=s" => \$opt::I,
"extensionreplace|er=s" => \$opt::U,
"U=s" => \$opt::retired,
"basenamereplace|bnr=s" => \$opt::basenamereplace,
"dirnamereplace|dnr=s" => \$opt::dirnamereplace,
"basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
"seqreplace=s" => \$opt::seqreplace,
"slotreplace=s" => \$opt::slotreplace,
"jobs|j=s" => \$opt::jobs,
"delay=f" => \$opt::delay,
"sshdelay=f" => \$opt::sshdelay,
"load=s" => \$opt::load,
"noswap" => \$opt::noswap,
"max-line-length-allowed" => \$opt::max_line_length_allowed,
"number-of-cpus" => \$opt::number_of_cpus,
"number-of-cores" => \$opt::number_of_cores,
"use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
"shellquote|shell_quote|shell-quote" => \$opt::shellquote,
"nice=i" => \$opt::nice,
"timeout=s" => \$opt::timeout,
"tag" => \$opt::tag,
"tagstring|tag-string=s" => \$opt::tagstring,
"onall" => \$opt::onall,
"nonall" => \$opt::nonall,
"filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
"sshlogin|S=s" => \@opt::sshlogin,
"sshloginfile|slf=s" => \@opt::sshloginfile,
"controlmaster|M" => \$opt::controlmaster,
"return=s" => \@opt::return,
"trc=s" => \@opt::trc,
"transfer" => \$opt::transfer,
"cleanup" => \$opt::cleanup,
"basefile|bf=s" => \@opt::basefile,
"B=s" => \$opt::retired,
"ctrlc|ctrl-c" => \$opt::ctrlc,
"noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc,
"workdir|work-dir|wd=s" => \$opt::workdir,
"W=s" => \$opt::retired,
"tmpdir=s" => \$opt::tmpdir,
"tempdir=s" => \$opt::tmpdir,
"use-compress-program|compress-program=s" => \$opt::compress_program,
"use-decompress-program|decompress-program=s" => \$opt::decompress_program,
"compress" => \$opt::compress,
"tty" => \$opt::tty,
"T" => \$opt::retired,
"halt-on-error|halt=s" => \$opt::halt_on_error,
"H=i" => \$opt::retired,
"retries=i" => \$opt::retries,
"dry-run|dryrun" => \$opt::dryrun,
"progress" => \$opt::progress,
"eta" => \$opt::eta,
"bar" => \$opt::bar,
"arg-sep|argsep=s" => \$opt::arg_sep,
"arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
"trim=s" => \$opt::trim,
"env=s" => \@opt::env,
"recordenv|record-env" => \$opt::record_env,
"plain" => \$opt::plain,
"profile|J=s" => \@opt::profile,
"pipe|spreadstdin" => \$opt::pipe,
"robin|round-robin|roundrobin" => \$opt::roundrobin,
"recstart=s" => \$opt::recstart,
"recend=s" => \$opt::recend,
"regexp|regex" => \$opt::regexp,
"remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
"files|output-as-files|outputasfiles" => \$opt::files,
"block|block-size|blocksize=s" => \$opt::blocksize,
"tollef" => \$opt::retired,
"gnu" => \$opt::gnu,
"xapply" => \$opt::xapply,
"bibtex" => \$opt::bibtex,
"nn|nonotice|no-notice" => \$opt::no_notice,
# xargs-compatibility - implemented, man, testsuite
"max-procs|P=s" => \$opt::jobs,
"delimiter|d=s" => \$opt::d,
"max-chars|s=i" => \$opt::max_chars,
"arg-file|a=s" => \@opt::a,
"no-run-if-empty|r" => \$opt::r,
"replace|i:s" => \$opt::i,
"E=s" => \$opt::eof,
"eof|e:s" => \$opt::eof,
"max-args|n=i" => \$opt::max_args,
"max-replace-args|N=i" => \$opt::max_replace_args,
"colsep|col-sep|C=s" => \$opt::colsep,
"help|h" => \$opt::help,
"L=f" => \$opt::L,
"max-lines|l:f" => \$opt::max_lines,
"interactive|p" => \$opt::p,
"verbose|t" => \$opt::verbose,
"version|V" => \$opt::version,
"minversion|min-version=i" => \$opt::minversion,
"show-limits|showlimits" => \$opt::show_limits,
"exit|x" => \$opt::x,
# Semaphore
"semaphore" => \$opt::semaphore,
"semaphoretimeout=i" => \$opt::semaphoretimeout,
"semaphorename|id=s" => \$opt::semaphorename,
"fg" => \$opt::fg,
"bg" => \$opt::bg,
"wait" => \$opt::wait,
# Shebang #!/usr/bin/parallel --shebang
"shebang|hashbang" => \$opt::shebang,
"internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles,
"Y" => \$opt::retired,
"skip-first-line" => \$opt::skip_first_line,
"header=s" => \$opt::header,
"cat" => \$opt::cat,
"fifo" => \$opt::fifo,
"pipepart|pipe-part" => \$opt::pipepart,
"hgrp|hostgroup|hostgroups" => \$opt::hostgroups,
);
}
sub get_options_from_array {
# Run GetOptions on @array
# Input:
# $array_ref = ref to @ARGV to parse
# @keep_only = Keep only these options
# Uses:
# @ARGV
# Returns:
# true if parsing worked
# false if parsing failed
# @$array_ref is changed
my ($array_ref, @keep_only) = @_;
if(not @$array_ref) {
# Empty array: No need to look more at that
return 1;
}
# A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
# supported everywhere
my @save_argv;
my $this_is_ARGV = (\@::ARGV == $array_ref);
if(not $this_is_ARGV) {
@save_argv = @::ARGV;
@::ARGV = @{$array_ref};
}
# If @keep_only set: Ignore all values except @keep_only
my %options = options_hash();
if(@keep_only) {
my (%keep,@dummy);
@keep{@keep_only} = @keep_only;
for my $k (grep { not $keep{$_} } keys %options) {
# Store the value of the option in @dummy
$options{$k} = \@dummy;
}
}
my $retval = GetOptions(%options);
if(not $this_is_ARGV) {
@{$array_ref} = @::ARGV;
@::ARGV = @save_argv;
}
return $retval;
}
sub parse_options {
# Returns: N/A
# Defaults:
$Global::version = 20141122;
$Global::progname = 'parallel';
$Global::infinity = 2**31;
$Global::debug = 0;
$Global::verbose = 0;
$Global::quoting = 0;
# Read only table with default --rpl values
%Global::replace =
(
'{}' => '',
'{#}' => '1 $_=$job->seq()',
'{%}' => '1 $_=$job->slot()',
'{/}' => 's:.*/::',
'{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);',
'{/.}' => 's:.*/::; s:\.[^/.]+$::;',
'{.}' => 's:\.[^/.]+$::',
);
%Global::plus =
(
# {} = {+/}/{/}
# = {.}.{+.} = {+/}/{/.}.{+.}
# = {..}.{+..} = {+/}/{/..}.{+..}
# = {...}.{+...} = {+/}/{/...}.{+...}
'{+/}' => 's:/[^/]*$::',
'{+.}' => 's:.*\.::',
'{+..}' => 's:.*\.([^.]*\.):$1:',
'{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
'{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
'{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
'{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
'{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
);
# Modifiable copy of %Global::replace
%Global::rpl = %Global::replace;
$Global::parens = "{==}";
$/="\n";
$Global::ignore_empty = 0;
$Global::interactive = 0;
$Global::stderr_verbose = 0;
$Global::default_simultaneous_sshlogins = 9;
$Global::exitstatus = 0;
$Global::halt_on_error_exitstatus = 0;
$Global::arg_sep = ":::";
$Global::arg_file_sep = "::::";
$Global::trim = 'n';
$Global::max_jobs_running = 0;
$Global::job_already_run = '';
$ENV{'TMPDIR'} ||= "/tmp";
@ARGV=read_options();
if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
$Global::debug = $opt::D;
$Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh";
if(defined $opt::X) { $Global::ContextReplace = 1; }
if(defined $opt::silent) { $Global::verbose = 0; }
if(defined $opt::0) { $/ = "\0"; }
if(defined $opt::d) { my $e="sprintf \"$opt::d\""; $/ = eval $e; }
if(defined $opt::p) { $Global::interactive = $opt::p; }
if(defined $opt::q) { $Global::quoting = 1; }
if(defined $opt::r) { $Global::ignore_empty = 1; }
if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
# Deal with --rpl
sub rpl {
# Modify %Global::rpl
# Replace $old with $new
my ($old,$new) = @_;
if($old ne $new) {
$Global::rpl{$new} = $Global::rpl{$old};
delete $Global::rpl{$old};
}
}
if(defined $opt::parens) { $Global::parens = $opt::parens; }
my $parenslen = 0.5*length $Global::parens;
$Global::parensleft = substr($Global::parens,0,$parenslen);
$Global::parensright = substr($Global::parens,$parenslen);
if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
if(defined $opt::I) { rpl('{}',$opt::I); }
if(defined $opt::U) { rpl('{.}',$opt::U); }
if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
if(defined $opt::basenameextensionreplace) {
rpl('{/.}',$opt::basenameextensionreplace);
}
for(@opt::rpl) {
# Create $Global::rpl entries for --rpl options
# E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
my ($shorthand,$long) = split/ /,$_,2;
$Global::rpl{$shorthand} = $long;
}
if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; }
if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); }
if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
if(defined $opt::help) { die_usage(); }
if(defined $opt::colsep) { $Global::trim = 'lr'; }
if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; }
if(defined $opt::trim) { $Global::trim = $opt::trim; }
if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; }
if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); }
if(defined $opt::number_of_cores) {
print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
}
if(defined $opt::max_line_length_allowed) {
print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
}
if(defined $opt::version) { version(); wait_and_exit(0); }
if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); }
if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
if(defined $opt::show_limits) { show_limits(); }
if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
if(@opt::return) { push @Global::ret_files, @opt::return; }
if(not defined $opt::recstart and
not defined $opt::recend) { $opt::recend = "\n"; }
if(not defined $opt::blocksize) { $opt::blocksize = "1M"; }
$opt::blocksize = multiply_binary_prefix($opt::blocksize);
if(defined $opt::controlmaster) { $opt::noctrlc = 1; }
if(defined $opt::semaphore) { $Global::semaphore = 1; }
if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
if(defined $opt::semaphorename) { $Global::semaphore = 1; }
if(defined $opt::fg) { $Global::semaphore = 1; }
if(defined $opt::bg) { $Global::semaphore = 1; }
if(defined $opt::wait) { $Global::semaphore = 1; }
if(defined $opt::halt_on_error and
$opt::halt_on_error=~/%/) { $opt::halt_on_error /= 100; }
if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) {
::error("--timeout must be seconds or percentage\n");
wait_and_exit(255);
}
if(defined $opt::minversion) {
print $Global::version,"\n";
if($Global::version < $opt::minversion) {
wait_and_exit(255);
} else {
wait_and_exit(0);
}
}
if(not defined $opt::delay) {
# Set --delay to --sshdelay if not set
$opt::delay = $opt::sshdelay;
}
if($opt::compress_program) {
$opt::compress = 1;
$opt::decompress_program ||= $opt::compress_program." -dc";
}
if($opt::compress) {
my ($compress, $decompress) = find_compression_program();
$opt::compress_program ||= $compress;
$opt::decompress_program ||= $decompress;
}
if(defined $opt::nonall) {
# Append a dummy empty argument
push @ARGV, $Global::arg_sep, "";
}
if(defined $opt::tty) {
# Defaults for --tty: -j1 -u
# Can be overridden with -jXXX -g
if(not defined $opt::jobs) {
$opt::jobs = 1;
}
if(not defined $opt::group) {
$opt::ungroup = 0;
}
}
if(@opt::trc) {
push @Global::ret_files, @opt::trc;
$opt::transfer = 1;
$opt::cleanup = 1;
}
if(defined $opt::max_lines) {
if($opt::max_lines eq "-0") {
# -l -0 (swallowed -0)
$opt::max_lines = 1;
$opt::0 = 1;
$/ = "\0";
} elsif ($opt::max_lines == 0) {
# If not given (or if 0 is given) => 1
$opt::max_lines = 1;
}
$Global::max_lines = $opt::max_lines;
if(not $opt::pipe) {
# --pipe -L means length of record - not max_number_of_args
$Global::max_number_of_args ||= $Global::max_lines;
}
}
# Read more than one arg at a time (-L, -N)
if(defined $opt::L) {
$Global::max_lines = $opt::L;
if(not $opt::pipe) {
# --pipe -L means length of record - not max_number_of_args
$Global::max_number_of_args ||= $Global::max_lines;
}
}
if(defined $opt::max_replace_args) {
$Global::max_number_of_args = $opt::max_replace_args;
$Global::ContextReplace = 1;
}
if((defined $opt::L or defined $opt::max_replace_args)
and
not ($opt::xargs or $opt::m)) {
$Global::ContextReplace = 1;
}
if(defined $opt::tag and not defined $opt::tagstring) {
$opt::tagstring = "\257<\257>"; # Default = {}
}
if(defined $opt::pipepart and
(defined $opt::L or defined $opt::max_lines
or defined $opt::max_replace_args)) {
::error("--pipepart is incompatible with --max-replace-args, ",
"--max-lines, and -L.\n");
wait_and_exit(255);
}
if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) {
# Deal with ::: and ::::
@ARGV=read_args_from_command_line();
}
# Semaphore defaults
# Must be done before computing number of processes and max_line_length
# because when running as a semaphore GNU Parallel does not read args
$Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
if($Global::semaphore) {
# A semaphore does not take input from neither stdin nor file
@opt::a = ("/dev/null");
push(@Global::unget_argv, [Arg->new("")]);
$Semaphore::timeout = $opt::semaphoretimeout || 0;
if(defined $opt::semaphorename) {
$Semaphore::name = $opt::semaphorename;
} else {
$Semaphore::name = `tty`;
chomp $Semaphore::name;
}
$Semaphore::fg = $opt::fg;
$Semaphore::wait = $opt::wait;
$Global::default_simultaneous_sshlogins = 1;
if(not defined $opt::jobs) {
$opt::jobs = 1;
}
if($Global::interactive and $opt::bg) {
::error("Jobs running in the ".
"background cannot be interactive.\n");
::wait_and_exit(255);
}
}
if(defined $opt::eta) {
$opt::progress = $opt::eta;
}
if(defined $opt::bar) {
$opt::progress = $opt::bar;
}
if(defined $opt::retired) {
::error("-g has been retired. Use --group.\n");
::error("-B has been retired. Use --bf.\n");
::error("-T has been retired. Use --tty.\n");
::error("-U has been retired. Use --er.\n");
::error("-W has been retired. Use --wd.\n");
::error("-Y has been retired. Use --shebang.\n");
::error("-H has been retired. Use --halt.\n");
::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n");
::wait_and_exit(255);
}
citation_notice();
parse_sshlogin();
parse_env_var();
if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
# As we do not know the max line length on the remote machine
# long commands generated by xargs may fail
# If opt_N is set, it is probably safe
::warning("Using -X or -m with --sshlogin may fail.\n");
}
if(not defined $opt::jobs) {
$opt::jobs = "100%";
}
open_joblog();
}
sub env_quote {
# Input:
# $v = value to quote
# Returns:
# $v = value quoted as environment variable
my $v = $_[0];
$v =~ s/([\\])/\\$1/g;
$v =~ s/([\[\] \#\'\&\<\>\(\)\;\{\}\t\"\$\`\*\174\!\?\~])/\\$1/g;
$v =~ s/\n/"\n"/g;
return $v;
}
sub record_env {
# Record current %ENV-keys in ~/.parallel/ignored_vars
# Returns: N/A
my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars";
if(open(my $vars_fh, ">", $ignore_filename)) {
print $vars_fh map { $_,"\n" } keys %ENV;
} else {
::error("Cannot write to $ignore_filename\n");
::wait_and_exit(255);
}
}
sub parse_env_var {
# Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen
#
# Bash functions must be parsed to export them remotely
# Pre-shellshock style bash function:
# myfunc=() {...
# Post-shellshock style bash function:
# BASH_FUNC_myfunc()=() {...
#
# Uses:
# $Global::envvar = eval string that will set variables in both bash and csh
# $Global::envwarn = If functions are used: Give warning in csh
# $Global::envvarlen = length of $Global::envvar
# @opt::env
# $Global::shell
# %ENV
# Returns: N/A
$Global::envvar = "";
$Global::envwarn = "";
my @vars = ('parallel_bash_environment');
for my $varstring (@opt::env) {
# Split up --env VAR1,VAR2
push @vars, split /,/, $varstring;
}
if(grep { /^_$/ } @vars) {
# --env _
# Include all vars that are not in a clean environment
if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) {
my @ignore = <$vars_fh>;
chomp @ignore;
my %ignore;
@ignore{@ignore} = @ignore;
close $vars_fh;
push @vars, grep { not defined $ignore{$_} } keys %ENV;
@vars = grep { not /^_$/ } @vars;
} else {
::error("Run '$Global::progname --record-env' in a clean environment first.\n");
::wait_and_exit(255);
}
}
# Duplicate vars as BASH functions to include post-shellshock functions.
# So --env myfunc should also look for BASH_FUNC_myfunc()
@vars = map { $_, "BASH_FUNC_$_()" } @vars;
# Keep only defined variables
@vars = grep { defined($ENV{$_}) } @vars;
# Pre-shellshock style bash function:
# myfunc=() { echo myfunc
# }
# Post-shellshock style bash function:
# BASH_FUNC_myfunc()=() { echo myfunc
# }
my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars;
if(@bash_functions) {
# Functions are not supported for all shells
if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) {
::warning("Shell functions may not be supported in $Global::shell\n");
}
}
# Pre-shellschock names are without ()
my @bash_pre_shellshock = grep { not /\(\)/ } @bash_functions;
# Post-shellschock names are with ()
my @bash_post_shellshock = grep { /\(\)/ } @bash_functions;
my @qcsh = (map { my $a=$_; "setenv $a " . env_quote($ENV{$a}) }
grep { not /^parallel_bash_environment$/ } @non_functions);
my @qbash = (map { my $a=$_; "export $a=" . env_quote($ENV{$a}) }
@non_functions, @bash_pre_shellshock);
push @qbash, map { my $a=$_; "eval $a\"\$$a\"" } @bash_pre_shellshock;
push @qbash, map { /BASH_FUNC_(.*)\(\)/; "$1 $ENV{$_}" } @bash_post_shellshock;
#ssh -tt -oLogLevel=quiet lo 'eval `echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ BASH_FUNC_myfunc\ \\\(\\\)\\\ \\\{\\\ \\\ echo\\\ a\"'
#'\"\\\}\ \|\|\ myfunc\(\)\ \{\ \ echo\ a'
#'\}\ \;myfunc\ 1;
# Check if any variables contain \n
if(my @v = map { s/BASH_FUNC_(.*)\(\)/$1/; $_ } grep { $ENV{$_}=~/\n/ } @vars) {
# \n is bad for csh and will cause it to fail.
$Global::envwarn = ::shell_quote_scalar(q{echo $SHELL | egrep "/t?csh" > /dev/null && echo CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset }."@v".q{ && exec false;}."\n\n") . $Global::envwarn;
}
if(not @qcsh) { push @qcsh, "true"; }
if(not @qbash) { push @qbash, "true"; }
# Create lines like:
# echo $SHELL | grep "/t\\{0,1\\}csh" >/dev/null && setenv V1 val1 && setenv V2 val2 || export V1=val1 && export V2=val2 ; echo "$V1$V2"
if(@vars) {
$Global::envvar .=
join"",
(q{echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null && }
. join(" && ", @qcsh)
. q{ || }
. join(" && ", @qbash)
.q{;});
if($ENV{'parallel_bash_environment'}) {
$Global::envvar .= 'eval "$parallel_bash_environment";'."\n";
}
}
$Global::envvarlen = length $Global::envvar;
}
sub open_joblog {
# Open joblog as specified by --joblog
# Uses:
# $opt::resume
# $opt::resume_failed
# $opt::joblog
# $opt::results
# $Global::job_already_run
# %Global::fd
my $append = 0;
if(($opt::resume or $opt::resume_failed)
and
not ($opt::joblog or $opt::results)) {
::error("--resume and --resume-failed require --joblog or --results.\n");
::wait_and_exit(255);
}
if($opt::joblog) {
if($opt::resume || $opt::resume_failed) {
if(open(my $joblog_fh, "<", $opt::joblog)) {
# Read the joblog
$append = <$joblog_fh>; # If there is a header: Open as append later
my $joblog_regexp;
if($opt::resume_failed) {
# Make a regexp that only matches commands with exit+signal=0
# 4 host 1360490623.067 3.445 1023 1222 0 0 command
$joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
} else {
# Just match the job number
$joblog_regexp='^(\d+)';
}
while(<$joblog_fh>) {
if(/$joblog_regexp/o) {
# This is 30% faster than set_job_already_run($1);
vec($Global::job_already_run,($1||0),1) = 1;
} elsif(not /\d+\s+[^\s]+\s+([0-9.]+\s+){6}/) {
::error("Format of '$opt::joblog' is wrong: $_");
::wait_and_exit(255);
}
}
close $joblog_fh;
}
}
if($append) {
# Append to joblog
if(not open($Global::joblog, ">>", $opt::joblog)) {
::error("Cannot append to --joblog $opt::joblog.\n");
::wait_and_exit(255);
}
} else {
if($opt::joblog eq "-") {
# Use STDOUT as joblog
$Global::joblog = $Global::fd{1};
} elsif(not open($Global::joblog, ">", $opt::joblog)) {
# Overwrite the joblog
::error("Cannot write to --joblog $opt::joblog.\n");
::wait_and_exit(255);
}
print $Global::joblog
join("\t", "Seq", "Host", "Starttime", "JobRuntime",
"Send", "Receive", "Exitval", "Signal", "Command"
). "\n";
}
}
}
sub find_compression_program {
# Find a fast compression program
# Returns:
# $compress_program = compress program with options
# $decompress_program = decompress program with options
# Search for these. Sorted by speed
my @prg = qw(lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2);
for my $p (@prg) {
if(which($p)) {
return ("$p -c -1","$p -dc");
}
}
# Fall back to cat
return ("cat","cat");
}
sub read_options {
# Read options from command line, profile and $PARALLEL
# Uses:
# $opt::shebang_wrap
# $opt::shebang
# @ARGV
# $opt::plain
# @opt::profile
# $ENV{'HOME'}
# $ENV{'PARALLEL'}
# Returns:
# @ARGV_no_opt = @ARGV without --options
# This must be done first as this may exec myself
if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
$ARGV[0] =~ /^--shebang-?wrap/ or
$ARGV[0] =~ /^--hashbang/)) {
# Program is called from #! line in script
# remove --shebang-wrap if it is set
$opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
# remove --shebang if it is set
$opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
# remove --hashbang if it is set
$opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
if($opt::shebang) {
my $argfile = shell_quote_scalar(pop @ARGV);
# exec myself to split $ARGV[0] into separate fields
exec "$0 --skip-first-line -a $argfile @ARGV";
}
if($opt::shebang_wrap) {
my @options;
my @parser;
if ($^O eq 'freebsd') {
# FreeBSD's #! puts different values in @ARGV than Linux' does.
my @nooptions = @ARGV;
get_options_from_array(\@nooptions);
while($#ARGV > $#nooptions) {
push @options, shift @ARGV;
}
while(@ARGV and $ARGV[0] ne ":::") {
push @parser, shift @ARGV;
}
if(@ARGV and $ARGV[0] eq ":::") {
shift @ARGV;
}
} else {
@options = shift @ARGV;
}
my $script = shell_quote_scalar(shift @ARGV);
# exec myself to split $ARGV[0] into separate fields
exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV";
}
}
Getopt::Long::Configure("bundling","require_order");
my @ARGV_copy = @ARGV;
# Check if there is a --profile to set @opt::profile
get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
my @ARGV_profile = ();
my @ARGV_env = ();
if(not $opt::plain) {
# Add options from .parallel/config and other profiles
my @config_profiles = (
"/etc/parallel/config",
$ENV{'HOME'}."/.parallel/config",
$ENV{'HOME'}."/.parallelrc");
my @profiles = @config_profiles;
if(@opt::profile) {
# --profile overrides default profiles
@profiles = ();
for my $profile (@opt::profile) {
if(-r $profile) {
push @profiles, $profile;
} else {
push @profiles, $ENV{'HOME'}."/.parallel/".$profile;
}
}
}
for my $profile (@profiles) {
if(-r $profile) {
open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile");
while(<$in_fh>) {
/^\s*\#/ and next;
chomp;
push @ARGV_profile, shellwords($_);
}
close $in_fh;
} else {
if(grep /^$profile$/, @config_profiles) {
# config file is not required to exist
} else {
::error("$profile not readable.\n");
wait_and_exit(255);
}
}
}
# Add options from shell variable $PARALLEL
if($ENV{'PARALLEL'}) {
@ARGV_env = shellwords($ENV{'PARALLEL'});
}
}
Getopt::Long::Configure("bundling","require_order");
get_options_from_array(\@ARGV_profile) || die_usage();
get_options_from_array(\@ARGV_env) || die_usage();
get_options_from_array(\@ARGV) || die_usage();
# Prepend non-options to @ARGV (such as commands like 'nice')
unshift @ARGV, @ARGV_profile, @ARGV_env;
return @ARGV;
}
sub read_args_from_command_line {
# Arguments given on the command line after:
# ::: ($Global::arg_sep)
# :::: ($Global::arg_file_sep)
# Removes the arguments from @ARGV and:
# - puts filenames into -a
# - puts arguments into files and add the files to -a
# Input:
# @::ARGV = command option ::: arg arg arg :::: argfiles
# Uses:
# $Global::arg_sep
# $Global::arg_file_sep
# $opt::internal_pipe_means_argfiles
# $opt::pipe
# @opt::a
# Returns:
# @argv_no_argsep = @::ARGV without ::: and :::: and following args
my @new_argv = ();
for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
if($arg eq $Global::arg_sep
or
$arg eq $Global::arg_file_sep) {
my $group = $arg; # This group of arguments is args or argfiles
my @group;
while(defined ($arg = shift @ARGV)) {
if($arg eq $Global::arg_sep
or
$arg eq $Global::arg_file_sep) {
# exit while loop if finding new separator
last;
} else {
# If not hitting ::: or ::::
# Append it to the group
push @group, $arg;
}
}
if($group eq $Global::arg_file_sep
or ($opt::internal_pipe_means_argfiles and $opt::pipe)
) {
# Group of file names on the command line.
# Append args into -a
push @opt::a, @group;
} elsif($group eq $Global::arg_sep) {
# Group of arguments on the command line.
# Put them into a file.
# Create argfile
my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
unlink($name);
# Put args into argfile
print $outfh map { $_,$/ } @group;
seek $outfh, 0, 0;
# Append filehandle to -a
push @opt::a, $outfh;
} else {
::die_bug("Unknown command line group: $group");
}
if(defined($arg)) {
# $arg is ::: or ::::
redo;
} else {
# $arg is undef -> @ARGV empty
last;
}
}
push @new_argv, $arg;
}
# Output: @ARGV = command to run with options
return @new_argv;
}
sub cleanup {
# Returns: N/A
if(@opt::basefile) { cleanup_basefile(); }
}
sub __QUOTING_ARGUMENTS_FOR_SHELL__ {}
sub shell_quote {
# Input:
# @strings = strings to be quoted
# Output:
# @shell_quoted_strings = string quoted with \ as needed by the shell
my @strings = (@_);
for my $a (@strings) {
$a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
$a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \'
}
return wantarray ? @strings : "@strings";
}
sub shell_quote_empty {
# Inputs:
# @strings = strings to be quoted
# Returns:
# @quoted_strings = empty strings quoted as ''.
my @strings = shell_quote(@_);
for my $a (@strings) {
if($a eq "") {
$a = "''";
}
}
return wantarray ? @strings : "@strings";
}
sub shell_quote_scalar {
# Quote the string so shell will not expand any special chars
# Inputs:
# $string = string to be quoted
# Returns:
# $shell_quoted = string quoted with \ as needed by the shell
my $a = $_[0];
if(defined $a) {
# $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
# This is 1% faster than the above
$a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go;
$a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \'
}
return $a;
}
sub shell_quote_file {
# Quote the string so shell will not expand any special chars and prepend ./ if needed
# Input:
# $filename = filename to be shell quoted
# Returns:
# $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed
my $a = shell_quote_scalar(shift);
if(defined $a) {
if($a =~ m:^/: or $a =~ m:^\./:) {
# /abs/path or ./rel/path => skip
} else {
# rel/path => ./rel/path
$a = "./".$a;
}
}
return $a;
}
sub shellwords {
# Input:
# $string = shell line
# Returns:
# @shell_words = $string split into words as shell would do
$Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
return Text::ParseWords::shellwords(@_);
}
sub __FILEHANDLES__ {}
sub save_stdin_stdout_stderr {
# Remember the original STDIN, STDOUT and STDERR
# and file descriptors opened by the shell (e.g. 3>/tmp/foo)
# Uses:
# %Global::fd
# $Global::original_stderr
# $Global::original_stdin
# Returns: N/A
# Find file descriptors that are already opened (by the shell)
for my $fdno (1..61) {
# /dev/fd/62 and above are used by bash for <(cmd)
my $fh;
# 2-argument-open is used to be compatible with old perl 5.8.0
# bug #43570: Perl 5.8.0 creates 61 files
if(open($fh,">&=$fdno")) {
$Global::fd{$fdno}=$fh;
}
}
open $Global::original_stderr, ">&", "STDERR" or
::die_bug("Can't dup STDERR: $!");
open $Global::original_stdin, "<&", "STDIN" or
::die_bug("Can't dup STDIN: $!");
$Global::is_terminal = (-t $Global::original_stderr);
}
sub enough_file_handles {
# Check that we have enough filehandles available for starting
# another job
# Uses:
# $opt::ungroup
# %Global::fd
# Returns:
# 1 if ungrouped (thus not needing extra filehandles)
# 0 if too few filehandles
# 1 if enough filehandles
if(not $opt::ungroup) {
my %fh;
my $enough_filehandles = 1;
# perl uses 7 filehandles for something?
# open3 uses 2 extra filehandles temporarily
# We need a filehandle for each redirected file descriptor
# (normally just STDOUT and STDERR)
for my $i (1..(7+2+keys %Global::fd)) {
$enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
}
for (values %fh) { close $_; }
return $enough_filehandles;
} else {
# Ungrouped does not need extra file handles
return 1;
}
}
sub open_or_exit {
# Open a file name or exit if the file cannot be opened
# Inputs:
# $file = filehandle or filename to open
# Uses:
# $Global::stdin_in_opt_a
# $Global::original_stdin
# Returns:
# $fh = file handle to read-opened file
my $file = shift;
if($file eq "-") {
$Global::stdin_in_opt_a = 1;
return ($Global::original_stdin || *STDIN);
}
if(ref $file eq "GLOB") {
# This is an open filehandle
return $file;
}
my $fh = gensym;
if(not open($fh, "<", $file)) {
::error("Cannot open input file `$file': No such file or directory.\n");
wait_and_exit(255);
}
return $fh;
}
sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {}
# Variable structure:
#
# $Global::running{$pid} = Pointer to Job-object
# @Global::virgin_jobs = Pointer to Job-object that have received no input
# $Global::host{$sshlogin} = Pointer to SSHLogin-object
# $Global::total_running = total number of running jobs
# $Global::total_started = total jobs started
sub init_run_jobs {
$Global::total_running = 0;
$Global::total_started = 0;
$Global::tty_taken = 0;
$SIG{USR1} = \&list_running_jobs;
$SIG{USR2} = \&toggle_progress;
if(@opt::basefile) { setup_basefile(); }
}
{
my $last_time;
my %last_mtime;
sub start_more_jobs {
# Run start_another_job() but only if:
# * not $Global::start_no_new_jobs set
# * not JobQueue is empty
# * not load on server is too high
# * not server swapping
# * not too short time since last remote login
# Uses:
# $Global::max_procs_file
# $Global::max_procs_file_last_mod
# %Global::host
# @opt::sshloginfile
# $Global::start_no_new_jobs
# $opt::filter_hosts
# $Global::JobQueue
# $opt::pipe
# $opt::load
# $opt::noswap
# $opt::delay
# $Global::newest_starttime
# Returns:
# $jobs_started = number of jobs started
my $jobs_started = 0;
my $jobs_started_this_round = 0;
if($Global::start_no_new_jobs) {
return $jobs_started;
}
if(time - ($last_time||0) > 1) {
# At most do this every second
$last_time = time;
if($Global::max_procs_file) {
# --jobs filename
my $mtime = (stat($Global::max_procs_file))[9];
if($mtime > $Global::max_procs_file_last_mod) {
# file changed: Force re-computing max_jobs_running
$Global::max_procs_file_last_mod = $mtime;
for my $sshlogin (values %Global::host) {
$sshlogin->set_max_jobs_running(undef);
}
}
}
if(@opt::sshloginfile) {
# Is --sshloginfile changed?
for my $slf (@opt::sshloginfile) {
my $actual_file = expand_slf_shorthand($slf);
my $mtime = (stat($actual_file))[9];
$last_mtime{$actual_file} ||= $mtime;
if($mtime - $last_mtime{$actual_file} > 1) {
::debug("run","--sshloginfile $actual_file changed. reload\n");
$last_mtime{$actual_file} = $mtime;
# Reload $slf
# Empty sshlogins
@Global::sshlogin = ();
for (values %Global::host) {
# Don't start new jobs on any host
# except the ones added back later
$_->set_max_jobs_running(0);
}
# This will set max_jobs_running on the SSHlogins
read_sshloginfile($actual_file);
parse_sshlogin();
$opt::filter_hosts and filter_hosts();
setup_basefile();
}
}
}
}
do {
$jobs_started_this_round = 0;
# This will start 1 job on each --sshlogin (if possible)
# thus distribute the jobs on the --sshlogins round robin
for my $sshlogin (values %Global::host) {
if($Global::JobQueue->empty() and not $opt::pipe) {
# No more jobs in the queue
last;
}
debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
$sshlogin->jobs_running(), "\n");
if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
if($opt::load and $sshlogin->loadavg_too_high()) {
# The load is too high or unknown
next;
}
if($opt::noswap and $sshlogin->swapping()) {
# The server is swapping
next;
}
if($sshlogin->too_fast_remote_login()) {
# It has been too short since
next;
}
if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) {
# It has been too short since last start
next;
}
debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(),
" out of ", $sshlogin->max_jobs_running(),
" jobs running. Start another.\n");
if(start_another_job($sshlogin) == 0) {
# No more jobs to start on this $sshlogin
debug("run","No jobs started on ", $sshlogin->string(), "\n");
next;
}
$sshlogin->inc_jobs_running();
$sshlogin->set_last_login_at(::now());
$jobs_started++;
$jobs_started_this_round++;
}
debug("run","Running jobs after on ", $sshlogin->string(), ": ",
$sshlogin->jobs_running(), " of ",
$sshlogin->max_jobs_running(), "\n");
}
} while($jobs_started_this_round);
return $jobs_started;
}
}
{
my $no_more_file_handles_warned;
sub start_another_job {
# If there are enough filehandles
# and JobQueue not empty
# and not $job is in joblog
# Then grab a job from Global::JobQueue,
# start it at sshlogin
# mark it as virgin_job
# Inputs:
# $sshlogin = the SSHLogin to start the job on
# Uses:
# $Global::JobQueue
# $opt::pipe
# $opt::results
# $opt::resume
# @Global::virgin_jobs
# Returns:
# 1 if another jobs was started
# 0 otherwise
my $sshlogin = shift;
# Do we have enough file handles to start another job?
if(enough_file_handles()) {
if($Global::JobQueue->empty() and not $opt::pipe) {
# No more commands to run
debug("start", "Not starting: JobQueue empty\n");
return 0;
} else {
my $job;
# Skip jobs already in job log
# Skip jobs already in results
do {
$job = get_job_with_sshlogin($sshlogin);
if(not defined $job) {
# No command available for that sshlogin
debug("start", "Not starting: no jobs available for ",
$sshlogin->string(), "\n");
return 0;
}
} while ($job->is_already_in_joblog()
or
($opt::results and $opt::resume and $job->is_already_in_results()));
debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
$job->replaced(),"'\n");
if($job->start()) {
if($opt::pipe) {
push(@Global::virgin_jobs,$job);
}
debug("start", "Started as seq ", $job->seq(),
" pid:", $job->pid(), "\n");
return 1;
} else {
# Not enough processes to run the job.
# Put it back on the queue.
$Global::JobQueue->unget($job);
# Count down the number of jobs to run for this SSHLogin.
my $max = $sshlogin->max_jobs_running();
if($max > 1) { $max--; } else {
::error("No more processes: cannot run a single job. Something is wrong.\n");
::wait_and_exit(255);
}
$sshlogin->set_max_jobs_running($max);
# Sleep up to 300 ms to give other processes time to die
::usleep(rand()*300);
::warning("No more processes: ",
"Decreasing number of running jobs to $max. ",
"Raising ulimit -u or /etc/security/limits.conf may help.\n");
return 0;
}
}
} else {
# No more file handles
$no_more_file_handles_warned++ or
::warning("No more file handles. ",
"Raising ulimit -n or /etc/security/limits.conf may help.\n");
return 0;
}
}
}
$opt::min_progress_interval = 0;
sub init_progress {
# Uses:
# $opt::bar
# Returns:
# list of computers for progress output
$|=1;
if (not $Global::is_terminal) {
$opt::min_progress_interval = 30;
}
if($opt::bar) {
return("","");
}
my %progress = progress();
return ("\nComputers / CPU cores / Max jobs to run\n",
$progress{'workerlist'});
}
sub drain_job_queue {
# Uses:
# $opt::progress
# $Global::original_stderr
# $Global::total_running
# $Global::max_jobs_running
# %Global::running
# $Global::JobQueue
# %Global::host
# $Global::start_no_new_jobs
# Returns: N/A
if($opt::progress) {
print $Global::original_stderr init_progress();
}
my $last_header="";
my $sleep = 0.2;
my $last_left = 1000000000;
my $last_progress_time = 0;
my $ps_reported = 0;
do {
while($Global::total_running > 0) {
debug($Global::total_running, "==", scalar
keys %Global::running," slots: ", $Global::max_jobs_running);
if($opt::pipe) {
# When using --pipe sometimes file handles are not closed properly
for my $job (values %Global::running) {
close $job->fh(0,"w");
}
}
# When not connected to terminal, assume CI (e.g. CircleCI). In
# that case we want occasional progress output to prevent abort
# due to timeout with no output, but we also need to stop sending
# progress output if there has been no actual progress, so that
# the job can time out appropriately (CirecleCI: 10m) in case of
# a hung test. But without special output, it is extremely
# annoying to diagnose which test is hung, so we add that using
# `ps` below.
if($opt::progress and
($Global::is_terminal or (time() - $last_progress_time) >= 30)) {
my %progress = progress();
if($last_header ne $progress{'header'}) {
print $Global::original_stderr "\n", $progress{'header'}, "\n";
$last_header = $progress{'header'};
}
if ($Global::is_terminal) {
print $Global::original_stderr "\r",$progress{'status'};
}
if ($last_left > $Global::left) {
if (not $Global::is_terminal) {
print $Global::original_stderr $progress{'status'},"\n";
}
$last_progress_time = time();
$ps_reported = 0;
} elsif (not $ps_reported and (time() - $last_progress_time) >= 60) {
# No progress in at least 60 seconds: run ps
print $Global::original_stderr "\n";
system("ps", "-wf");
$ps_reported = 1;
}
$last_left = $Global::left;
flush $Global::original_stderr;
}
if($Global::total_running < $Global::max_jobs_running
and not $Global::JobQueue->empty()) {
# These jobs may not be started because of loadavg
# or too little time between each ssh login.
if(start_more_jobs() > 0) {
# Exponential back-on if jobs were started
$sleep = $sleep/2+0.001;
}
}
# Sometimes SIGCHLD is not registered, so force reaper
$sleep = ::reap_usleep($sleep);
}
if(not $Global::JobQueue->empty()) {
# These jobs may not be started:
# * because there the --filter-hosts has removed all
if(not %Global::host) {
::error("There are no hosts left to run on.\n");
::wait_and_exit(255);
}
# * because of loadavg
# * because of too little time between each ssh login.
start_more_jobs();
$sleep = ::reap_usleep($sleep);
if($Global::max_jobs_running == 0) {
::warning("There are no job slots available. Increase --jobs.\n");
}
}
} while ($Global::total_running > 0
or
not $Global::start_no_new_jobs and not $Global::JobQueue->empty());
if($opt::progress) {
my %progress = progress();
print $Global::original_stderr $opt::progress_sep, $progress{'status'}, "\n";
flush $Global::original_stderr;
}
}
sub toggle_progress {
# Turn on/off progress view
# Uses:
# $opt::progress
# $Global::original_stderr
# Returns: N/A
$opt::progress = not $opt::progress;
if($opt::progress) {
print $Global::original_stderr init_progress();
}
}
sub progress {
# Uses:
# $opt::bar
# $opt::eta
# %Global::host
# $Global::total_started
# Returns:
# $workerlist = list of workers
# $header = that will fit on the screen
# $status = message that will fit on the screen
if($opt::bar) {
return ("workerlist" => "", "header" => "", "status" => bar());
}
my $eta = "";
my ($status,$header)=("","");
if($opt::eta) {
my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
compute_eta();
$eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
$this_eta, $left, $avgtime);
$Global::left = $left;
}
my $termcols = terminal_columns();
my @workers = sort keys %Global::host;
my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers;
my $workerno = 1;
my %workerno = map { ($_=>$workerno++) } @workers;
my $workerlist = "";
for my $w (@workers) {
$workerlist .=
$workerno{$w}.":".$sshlogin{$w} ." / ".
($Global::host{$w}->ncpus() || "-")." / ".
$Global::host{$w}->max_jobs_running()."\n";
}
$status = "x"x($termcols+1);
if(length $status > $termcols) {
# sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
$header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
$status = $eta .
join(" ",map
{
if($Global::total_started) {
my $completed = ($Global::host{$_}->jobs_completed()||0);
my $running = $Global::host{$_}->jobs_running();
my $time = $completed ? (time-$^T)/($completed) : "0";
sprintf("%s:%d/%d/%d%%/%.1fs ",
$sshlogin{$_}, $running, $completed,
($running+$completed)*100
/ $Global::total_started, $time);
}
} @workers);
}
if(length $status > $termcols) {
# 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
$header = "Computer:jobs running/jobs completed/%of started jobs";
$status = $eta .
join(" ",map
{
my $completed = ($Global::host{$_}->jobs_completed()||0);
my $running = $Global::host{$_}->jobs_running();
my $time = $completed ? (time-$^T)/($completed) : "0";
sprintf("%s:%d/%d/%d%%/%.1fs ",
$workerno{$_}, $running, $completed,
($running+$completed)*100
/ $Global::total_started, $time);
} @workers);
}
if(length $status > $termcols) {
# sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
$header = "Computer:jobs running/jobs completed/%of started jobs";
$status = $eta .
join(" ",map
{ sprintf("%s:%d/%d/%d%%",
$sshlogin{$_},
$Global::host{$_}->jobs_running(),
($Global::host{$_}->jobs_completed()||0),
($Global::host{$_}->jobs_running()+
($Global::host{$_}->jobs_completed()||0))*100
/ $Global::total_started) }
@workers);
}
if(length $status > $termcols) {
# 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
$header = "Computer:jobs running/jobs completed/%of started jobs";
$status = $eta .
join(" ",map
{ sprintf("%s:%d/%d/%d%%",
$workerno{$_},
$Global::host{$_}->jobs_running(),
($Global::host{$_}->jobs_completed()||0),
($Global::host{$_}->jobs_running()+
($Global::host{$_}->jobs_completed()||0))*100
/ $Global::total_started) }
@workers);
}
if(length $status > $termcols) {
# sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
$header = "Computer:jobs running/jobs completed";
$status = $eta .
join(" ",map
{ sprintf("%s:%d/%d",
$sshlogin{$_}, $Global::host{$_}->jobs_running(),
($Global::host{$_}->jobs_completed()||0)) }
@workers);
}
if(length $status > $termcols) {
# sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
$header = "Computer:jobs running/jobs completed";
$status = $eta .
join(" ",map
{ sprintf("%s:%d/%d",
$sshlogin{$_}, $Global::host{$_}->jobs_running(),
($Global::host{$_}->jobs_completed()||0)) }
@workers);
}
if(length $status > $termcols) {
# 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
$header = "Computer:jobs running/jobs completed";
$status = $eta .
join(" ",map
{ sprintf("%s:%d/%d",
$workerno{$_}, $Global::host{$_}->jobs_running(),
($Global::host{$_}->jobs_completed()||0)) }
@workers);
}
if(length $status > $termcols) {
# sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
$header = "Computer:jobs completed";
$status = $eta .
join(" ",map
{ sprintf("%s:%d",
$sshlogin{$_},
($Global::host{$_}->jobs_completed()||0)) }
@workers);
}
if(length $status > $termcols) {
# 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
$header = "Computer:jobs completed";
$status = $eta .
join(" ",map
{ sprintf("%s:%d",
$workerno{$_},
($Global::host{$_}->jobs_completed()||0)) }
@workers);
}
return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
}
{
my ($total, $first_completed, $smoothed_avg_time);
sub compute_eta {
# Calculate important numbers for ETA
# Returns:
# $total = number of jobs in total
# $completed = number of jobs completed
# $left = number of jobs left
# $pctcomplete = percent of jobs completed
# $avgtime = averaged time
# $eta = smoothed eta
$total ||= $Global::JobQueue->total_jobs();
my $completed = 0;
for(values %Global::host) { $completed += $_->jobs_completed() }
my $left = $total - $completed;
if(not $completed) {
return($total, $completed, $left, 0, 0, 0);
}
my $pctcomplete = $completed / $total;
$first_completed ||= time;
my $timepassed = (time - $first_completed);
my $avgtime = $timepassed / $completed;
$smoothed_avg_time ||= $avgtime;
# Smooth the eta so it does not jump wildly
$smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
$pctcomplete * $avgtime;
my $eta = int($left * $smoothed_avg_time);
return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
}
}
{
my ($rev,$reset);
sub bar {
# Return:
# $status = bar with eta, completed jobs, arg and pct
$rev ||= "\033[7m";
$reset ||= "\033[0m";
my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
compute_eta();
my $arg = $Global::newest_job ?
$Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : "";
# These chars mess up display in the terminal
$arg =~ tr/[\011-\016\033\302-\365]//d;
my $bar_text =
sprintf("%d%% %d:%d=%ds %s",
$pctcomplete*100, $completed, $left, $eta, $arg);
my $terminal_width = terminal_columns();
my $s = sprintf("%-${terminal_width}s",
substr($bar_text." "x$terminal_width,
0,$terminal_width));
my $width = int($terminal_width * $pctcomplete);
substr($s,$width,0) = $reset;
my $zenity = sprintf("%-${terminal_width}s",
substr("# $eta sec $arg",
0,$terminal_width));
$s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
"\r" . $rev . $s . $reset;
return $s;
}
}
{
my ($columns,$last_column_time);
sub terminal_columns {
# Get the number of columns of the display
# Returns:
# number of columns of the screen
if(not $columns or $last_column_time < time) {
$last_column_time = time;
$columns = $ENV{'COLUMNS'};
if(not $columns) {
my $resize = qx{ resize 2>/dev/null };
$resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
}
$columns ||= 80;
}
return $columns;
}
}
sub get_job_with_sshlogin {
# Returns:
# next job object for $sshlogin if any available
my $sshlogin = shift;
my $job = undef;
if ($opt::hostgroups) {
my @other_hostgroup_jobs = ();
while($job = $Global::JobQueue->get()) {
if($sshlogin->in_hostgroups($job->hostgroups())) {
# Found a job for this hostgroup
last;
} else {
# This job was not in the hostgroups of $sshlogin
push @other_hostgroup_jobs, $job;
}
}
$Global::JobQueue->unget(@other_hostgroup_jobs);
if(not defined $job) {
# No more jobs
return undef;
}
} else {
$job = $Global::JobQueue->get();
if(not defined $job) {
# No more jobs
::debug("start", "No more jobs: JobQueue empty\n");
return undef;
}
}
my $clean_command = $job->replaced();
if($clean_command =~ /^\s*$/) {
# Do not run empty lines
if(not $Global::JobQueue->empty()) {
return get_job_with_sshlogin($sshlogin);
} else {
return undef;
}
}
$job->set_sshlogin($sshlogin);
if($opt::retries and $clean_command and
$job->failed_here()) {
# This command with these args failed for this sshlogin
my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
# Only look at the Global::host that have > 0 jobslots
if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host
and $job->failed_here() == $min_failures) {
# It failed the same or more times on another host:
# run it on this host
} else {
# If it failed fewer times on another host:
# Find another job to run
my $nextjob;
if(not $Global::JobQueue->empty()) {
# This can potentially recurse for all args
no warnings 'recursion';
$nextjob = get_job_with_sshlogin($sshlogin);
}
# Push the command back on the queue
$Global::JobQueue->unget($job);
return $nextjob;
}
}
return $job;
}
sub __REMOTE_SSH__ {}
sub read_sshloginfiles {
# Returns: N/A
for my $s (@_) {
read_sshloginfile(expand_slf_shorthand($s));
}
}
sub expand_slf_shorthand {
my $file = shift;
if($file eq "-") {
# skip: It is stdin
} elsif($file eq "..") {
$file = $ENV{'HOME'}."/.parallel/sshloginfile";
} elsif($file eq ".") {
$file = "/etc/parallel/sshloginfile";
} elsif(not -r $file) {
if(not -r $ENV{'HOME'}."/.parallel/".$file) {
# Try prepending ~/.parallel
::error("Cannot open $file.\n");
::wait_and_exit(255);
} else {
$file = $ENV{'HOME'}."/.parallel/".$file;
}
}
return $file;
}
sub read_sshloginfile {
# Returns: N/A
my $file = shift;
my $close = 1;
my $in_fh;
::debug("init","--slf ",$file);
if($file eq "-") {
$in_fh = *STDIN;
$close = 0;
} else {
if(not open($in_fh, "<", $file)) {
# Try the filename
::error("Cannot open $file.\n");
::wait_and_exit(255);
}
}
while(<$in_fh>) {
chomp;
/^\s*#/ and next;
/^\s*$/ and next;
push @Global::sshlogin, $_;
}
if($close) {
close $in_fh;
}
}
sub parse_sshlogin {
# Returns: N/A
my @login;
if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
for my $sshlogin (@Global::sshlogin) {
# Split up -S sshlogin,sshlogin
for my $s (split /,/, $sshlogin) {
if ($s eq ".." or $s eq "-") {
# This may add to @Global::sshlogin - possibly bug
read_sshloginfile(expand_slf_shorthand($s));
} else {
push (@login, $s);
}
}
}
$Global::minimal_command_line_length = 8_000_000;
my @allowed_hostgroups;
for my $ncpu_sshlogin_string (::uniq(@login)) {
my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
my $sshlogin_string = $sshlogin->string();
if($sshlogin_string eq "") {
# This is an ssh group: -S @webservers
push @allowed_hostgroups, $sshlogin->hostgroups();
next;
}
if($Global::host{$sshlogin_string}) {
# This sshlogin has already been added:
# It is probably a host that has come back
# Set the max_jobs_running back to the original
debug("run","Already seen $sshlogin_string\n");
if($sshlogin->{'ncpus'}) {
# If ncpus set by '#/' of the sshlogin, overwrite it:
$Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
}
$Global::host{$sshlogin_string}->set_max_jobs_running(undef);
next;
}
if($sshlogin_string eq ":") {
$sshlogin->set_maxlength(Limits::Command::max_length());
} else {
# If all chars needs to be quoted, every other character will be \
$sshlogin->set_maxlength(int(Limits::Command::max_length()/2));
}
$Global::minimal_command_line_length =
::min($Global::minimal_command_line_length, $sshlogin->maxlength());
$Global::host{$sshlogin_string} = $sshlogin;
}
if(@allowed_hostgroups) {
# Remove hosts that are not in these groups
while (my ($string, $sshlogin) = each %Global::host) {
if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
delete $Global::host{$string};
}
}
}
# debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) {
if(not remote_hosts()) {
# There are no remote hosts
if(@opt::trc) {
::warning("--trc ignored as there are no remote --sshlogin.\n");
} elsif (defined $opt::transfer) {
::warning("--transfer ignored as there are no remote --sshlogin.\n");
} elsif (@opt::return) {
::warning("--return ignored as there are no remote --sshlogin.\n");
} elsif (defined $opt::cleanup) {
::warning("--cleanup ignored as there are no remote --sshlogin.\n");
} elsif (@opt::basefile) {
::warning("--basefile ignored as there are no remote --sshlogin.\n");
}
}
}
}
sub remote_hosts {
# Return sshlogins that are not ':'
# Returns:
# list of sshlogins with ':' removed
return grep !/^:$/, keys %Global::host;
}
sub setup_basefile {
# Transfer basefiles to each $sshlogin
# This needs to be done before first jobs on $sshlogin is run
# Returns: N/A
my $cmd = "";
my $rsync_destdir;
my $workdir;
for my $sshlogin (values %Global::host) {
if($sshlogin->string() eq ":") { next }
for my $file (@opt::basefile) {
if($file !~ m:^/: and $opt::workdir eq "...") {
::error("Work dir '...' will not work with relative basefiles\n");
::wait_and_exit(255);
}
$workdir ||= Job->new("")->workdir();
$cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&";
}
}
$cmd .= "wait;";
debug("init", "basesetup: $cmd\n");
print `$cmd`;
}
sub cleanup_basefile {
# Remove the basefiles transferred
# Returns: N/A
my $cmd="";
my $workdir = Job->new("")->workdir();
for my $sshlogin (values %Global::host) {
if($sshlogin->string() eq ":") { next }
for my $file (@opt::basefile) {
$cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&";
}
}
$cmd .= "wait;";
debug("init", "basecleanup: $cmd\n");
print `$cmd`;
}
sub filter_hosts {
my(@cores, @cpus, @maxline, @echo);
my $envvar = ::shell_quote_scalar($Global::envvar);
while (my ($host, $sshlogin) = each %Global::host) {
if($host eq ":") { next }
# The 'true' is used to get the $host out later
my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin();
push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0");
push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0");
push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0");
# 'echo' is used to get the best possible value for an ssh login time
push(@echo, $host."\t".$sshcmd." echo\n\0");
}
my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh");
print $fh @cores, @cpus, @maxline, @echo;
close $fh;
# --timeout 5: Setting up an SSH connection and running a simple
# command should never take > 5 sec.
# --delay 0.1: If multiple sshlogins use the same proxy the delay
# will make it less likely to overload the ssh daemon.
# --retries 3: If the ssh daemon it overloaded, try 3 times
# -s 16000: Half of the max line on UnixWare
my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null";
::debug("init", $cmd, "\n");
open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts);
my $prepend = "";
while(<$host_fh>) {
if(/\'$/) {
# if last char = ' then append next line
# This may be due to quoting of $Global::envvar
$prepend .= $_;
next;
}
$_ = $prepend . $_;
$prepend = "";
chomp;
my @col = split /\t/, $_;
if(defined $col[6]) {
# This is a line from --joblog
# seq host time spent sent received exit signal command
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
if($col[0] eq "Seq" and $col[1] eq "Host" and
$col[2] eq "Starttime") {
# Header => skip
next;
}
# Get server from: eval true server\;
$col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]");
my $host = $1;
$host =~ tr/\\//d;
$Global::host{$host} or next;
if($col[6] eq "255" or $col[7] eq "15") {
# exit == 255 or signal == 15: ssh failed
# Remove sshlogin
::debug("init", "--filtered $host\n");
push(@down_hosts, $host);
@down_hosts = uniq(@down_hosts);
} elsif($col[6] eq "127") {
# signal == 127: parallel not installed remote
# Set ncpus and ncores = 1
::warning("Could not figure out ",
"number of cpus on $host. Using 1.\n");
$ncores{$host} = 1;
$ncpus{$host} = 1;
$maxlen{$host} = Limits::Command::max_length();
} elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
# Remember how log it took to log in
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
$time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
} else {
::die_bug("host check unmatched long jobline: $_");
}
} elsif($Global::host{$col[0]}) {
# This output from --number-of-cores, --number-of-cpus,
# --max-line-length-allowed
# ncores: server 8
# ncpus: server 2
# maxlen: server 131071
if(not $ncores{$col[0]}) {
$ncores{$col[0]} = $col[1];
} elsif(not $ncpus{$col[0]}) {
$ncpus{$col[0]} = $col[1];
} elsif(not $maxlen{$col[0]}) {
$maxlen{$col[0]} = $col[1];
} elsif(not $echo{$col[0]}) {
$echo{$col[0]} = $col[1];
} elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
# Skip these:
# perl: warning: Setting locale failed.
# perl: warning: Please check that your locale settings:
# LANGUAGE = (unset),
# LC_ALL = (unset),
# LANG = "en_US.UTF-8"
# are supported and installed on your system.
# perl: warning: Falling back to the standard locale ("C").
} else {
::die_bug("host check too many col0: $_");
}
} else {
::die_bug("host check unmatched short jobline ($col[0]): $_");
}
}
close $host_fh;
$Global::debug or unlink $tmpfile;
delete @Global::host{@down_hosts};
@down_hosts and ::warning("Removed @down_hosts\n");
$Global::minimal_command_line_length = 8_000_000;
while (my ($sshlogin, $obj) = each %Global::host) {
if($sshlogin eq ":") { next }
$ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin());
$ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin());
$time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin());
$maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin());
if($opt::use_cpus_instead_of_cores) {
$obj->set_ncpus($ncpus{$sshlogin});
} else {
$obj->set_ncpus($ncores{$sshlogin});
}
$obj->set_time_to_login($time_to_login{$sshlogin});
$obj->set_maxlength($maxlen{$sshlogin});
$Global::minimal_command_line_length =
::min($Global::minimal_command_line_length,
int($maxlen{$sshlogin}/2));
::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus{$sshlogin},
" ncores:", $ncores{$sshlogin},
" time_to_login:", $time_to_login{$sshlogin},
" maxlen:", $maxlen{$sshlogin},
" min_max_len:", $Global::minimal_command_line_length,"\n");
}
}
sub onall {
sub tmp_joblog {
my $joblog = shift;
if(not defined $joblog) {
return undef;
}
my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
close $fh;
return $tmpfile;
}
my @command = @_;
if($Global::quoting) {
@command = shell_quote_empty(@command);
}
# Copy all @fhlist into tempfiles
my @argfiles = ();
for my $fh (@fhlist) {
my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1);
print $outfh (<$fh>);
close $outfh;
push @argfiles, $name;
}
if(@opt::basefile) { setup_basefile(); }
# for each sshlogin do:
# parallel -S $sshlogin $command :::: @argfiles
#
# Pass some of the options to the sub-parallels, not all of them as
# -P should only go to the first, and -S should not be copied at all.
my $options =
join(" ",
((defined $opt::jobs) ? "-P $opt::jobs" : ""),
((defined $opt::linebuffer) ? "--linebuffer" : ""),
((defined $opt::ungroup) ? "-u" : ""),
((defined $opt::group) ? "-g" : ""),
((defined $opt::keeporder) ? "--keeporder" : ""),
((defined $opt::D) ? "-D $opt::D" : ""),
((defined $opt::plain) ? "--plain" : ""),
((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
);
my $suboptions =
join(" ",
((defined $opt::ungroup) ? "-u" : ""),
((defined $opt::linebuffer) ? "--linebuffer" : ""),
((defined $opt::group) ? "-g" : ""),
((defined $opt::files) ? "--files" : ""),
((defined $opt::keeporder) ? "--keeporder" : ""),
((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
((@opt::v) ? "-vv" : ""),
((defined $opt::D) ? "-D $opt::D" : ""),
((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
((defined $opt::plain) ? "--plain" : ""),
((defined $opt::retries) ? "--retries ".$opt::retries : ""),
((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
(@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""),
);
::debug("init", "| $0 $options\n");
open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") ||
::die_bug("This does not run GNU Parallel: $0 $options");
my @joblogs;
for my $host (sort keys %Global::host) {
my $sshlogin = $Global::host{$host};
my $joblog = tmp_joblog($opt::joblog);
if($joblog) {
push @joblogs, $joblog;
$joblog = "--joblog $joblog";
}
my $quad = $opt::arg_file_sep || "::::";
::debug("init", "$0 $suboptions -j1 $joblog ",
((defined $opt::tag) ?
"--tagstring ".shell_quote_scalar($sshlogin->string()) : ""),
" -S ", shell_quote_scalar($sshlogin->string())," ",
join(" ",shell_quote(@command))," $quad @argfiles\n");
print $parallel_fh "$0 $suboptions -j1 $joblog ",
((defined $opt::tag) ?
"--tagstring ".shell_quote_scalar($sshlogin->string()) : ""),
" -S ", shell_quote_scalar($sshlogin->string())," ",
join(" ",shell_quote(@command))," $quad @argfiles\n";
}
close $parallel_fh;
$Global::exitstatus = $? >> 8;
debug("init", "--onall exitvalue ", $?);
if(@opt::basefile) { cleanup_basefile(); }
$Global::debug or unlink(@argfiles);
my %seen;
for my $joblog (@joblogs) {
# Append to $joblog
open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
# Skip first line (header);
<$fh>;
print $Global::joblog (<$fh>);
close $fh;
unlink($joblog);
}
}
sub __SIGNAL_HANDLING__ {}
sub save_original_signal_handler {
# Remember the original signal handler
# Returns: N/A
$SIG{TERM} ||= sub { exit 0; }; # $SIG{TERM} is not set on Mac OS X
$SIG{INT} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; }
unlink keys %Global::unlink; exit -1 };
$SIG{TERM} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; }
unlink keys %Global::unlink; exit -1 };
%Global::original_sig = %SIG;
$SIG{TERM} = sub {}; # Dummy until jobs really start
}
sub list_running_jobs {
# Returns: N/A
for my $v (values %Global::running) {
print $Global::original_stderr "$Global::progname: ",$v->replaced(),"\n";
}
}
sub start_no_new_jobs {
# Returns: N/A
$SIG{TERM} = $Global::original_sig{TERM};
print $Global::original_stderr
("$Global::progname: SIGTERM received. No new jobs will be started.\n",
"$Global::progname: Waiting for these ", scalar(keys %Global::running),
" jobs to finish. Send SIGTERM again to stop now.\n");
list_running_jobs();
$Global::start_no_new_jobs ||= 1;
}
sub reaper {
# A job finished.
# Print the output.
# Start another job
# Returns: N/A
my $stiff;
my $children_reaped = 0;
debug("run", "Reaper ");
while (($stiff = waitpid(-1, &WNOHANG)) > 0) {
$children_reaped++;
if($Global::sshmaster{$stiff}) {
# This is one of the ssh -M: ignore
next;
}
my $job = $Global::running{$stiff};
# '-a <(seq 10)' will give us a pid not in %Global::running
$job or next;
$job->set_exitstatus($? >> 8);
$job->set_exitsignal($? & 127);
debug("run", "died (", $job->exitstatus(), "): ", $job->seq());
$job->set_endtime(::now());
if($stiff == $Global::tty_taken) {
# The process that died had the tty => release it
$Global::tty_taken = 0;
}
if(not $job->should_be_retried()) {
# The job is done
# Free the jobslot
push @Global::slots, $job->slot();
if($opt::timeout) {
# Update average runtime for timeout
$Global::timeoutq->update_delta_time($job->runtime());
}
# Force printing now if the job failed and we are going to exit
my $print_now = ($opt::halt_on_error and $opt::halt_on_error == 2
and $job->exitstatus());
if($opt::keeporder and not $print_now) {
print_earlier_jobs($job);
} else {
$job->print();
}
if($job->exitstatus()) {
process_failed_job($job);
}
}
my $sshlogin = $job->sshlogin();
$sshlogin->dec_jobs_running();
$sshlogin->inc_jobs_completed();
$Global::total_running--;
delete $Global::running{$stiff};
start_more_jobs();
}
debug("run", "done ");
return $children_reaped;
}
sub process_failed_job {
# The jobs had a exit status <> 0, so error
# Returns: N/A
my $job = shift;
$Global::exitstatus++;
$Global::total_failed++;
if($opt::halt_on_error) {
if($opt::halt_on_error == 1
or
($opt::halt_on_error < 1 and $Global::total_failed > 3
and
$Global::total_failed / $Global::total_started > $opt::halt_on_error)) {
# If halt on error == 1 or --halt 10%
# we should gracefully exit
print $Global::original_stderr
("$Global::progname: Starting no more jobs. ",
"Waiting for ", scalar(keys %Global::running),
" jobs to finish. This job failed:\n",
$job->replaced(),"\n");
$Global::start_no_new_jobs ||= 1;
$Global::halt_on_error_exitstatus = $job->exitstatus();
} elsif($opt::halt_on_error == 2) {
# If halt on error == 2 we should exit immediately
print $Global::original_stderr
("$Global::progname: This job failed:\n",
$job->replaced(),"\n");
exit ($job->exitstatus());
}
}
}
{
my (%print_later,$job_end_sequence);
sub print_earlier_jobs {
# Print jobs completed earlier
# Returns: N/A
my $job = shift;
$print_later{$job->seq()} = $job;
$job_end_sequence ||= 1;
debug("run", "Looking for: $job_end_sequence ",
"Current: ", $job->seq(), "\n");
for(my $j = $print_later{$job_end_sequence};
$j or vec($Global::job_already_run,$job_end_sequence,1);
$job_end_sequence++,
$j = $print_later{$job_end_sequence}) {
debug("run", "Found job end $job_end_sequence");
if($j) {
$j->print();
delete $print_later{$job_end_sequence};
}
}
}
}
sub __USAGE__ {}
sub wait_and_exit {
# If we do not wait, we sometimes get segfault
# Returns: N/A
my $error = shift;
if($error) {
# Kill all without printing
for my $job (values %Global::running) {
$job->kill("TERM");
$job->kill("TERM");
}
}
for (keys %Global::unkilled_children) {
kill 9, $_;
waitpid($_,0);
delete $Global::unkilled_children{$_};
}
wait();
exit($error);
}
sub die_usage {
# Returns: N/A
usage();
wait_and_exit(255);
}
sub usage {
# Returns: N/A
print join
("\n",
"Usage:",
"",
"$Global::progname [options] [command [arguments]] < list_of_arguments",
"$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
"cat ... | $Global::progname --pipe [options] [command [arguments]]",
"",
"-j n Run n jobs in parallel",
"-k Keep same order",
"-X Multiple arguments with context replace",
"--colsep regexp Split input on regexp for positional replacements",
"{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
"{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
"With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
" {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
"",
"-S sshlogin Example: foo\@server.example.com",
"--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
"--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
"--onall Run the given command with argument on all sshlogins",
"--nonall Run the given command with no arguments on all sshlogins",
"",
"--pipe Split stdin (standard input) to multiple jobs.",
"--recend str Record end separator for --pipe.",
"--recstart str Record start separator for --pipe.",
"",
"See 'man $Global::progname' for details",
"",
"When using programs that use GNU Parallel to process data for publication please cite:",
"",
"O. Tange (2011): GNU Parallel - The Command-Line Power Tool,",
";login: The USENIX Magazine, February 2011:42-47.",
"",
"Or you can get GNU Parallel without this requirement by paying 10000 EUR.",
"");
}
sub citation_notice {
# if --no-notice or --plain: do nothing
# if stderr redirected: do nothing
# if ~/.parallel/will-cite: do nothing
# else: print citation notice to stderr
if($opt::no_notice
or
$opt::plain
or
not -t $Global::original_stderr
or
-e $ENV{'HOME'}."/.parallel/will-cite") {
# skip
} else {
print $Global::original_stderr
("When using programs that use GNU Parallel to process data for publication please cite:\n",
"\n",
" O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n",
" ;login: The USENIX Magazine, February 2011:42-47.\n",
"\n",
"This helps funding further development; and it won't cost you a cent.\n",
"Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n",
"\n",
"To silence this citation notice run 'parallel --bibtex' once or use '--no-notice'.\n\n",
);
flush $Global::original_stderr;
}
}
sub warning {
my @w = @_;
my $fh = $Global::original_stderr || *STDERR;
my $prog = $Global::progname || "parallel";
print $fh $prog, ": Warning: ", @w;
}
sub error {
my @w = @_;
my $fh = $Global::original_stderr || *STDERR;
my $prog = $Global::progname || "parallel";
print $fh $prog, ": Error: ", @w;
}
sub die_bug {
my $bugid = shift;
print STDERR
("$Global::progname: This should not happen. You have found a bug.\n",
"Please contact <parallel\@gnu.org> and include:\n",
"* The version number: $Global::version\n",
"* The bugid: $bugid\n",
"* The command line being run\n",
"* The files being read (put the files on a webserver if they are big)\n",
"\n",
"If you get the error on smaller/fewer files, please include those instead.\n");
::wait_and_exit(255);
}
sub version {
# Returns: N/A
if($opt::tollef and not $opt::gnu) {
print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n";
}
print join("\n",
"GNU $Global::progname $Global::version",
"Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and Free Software Foundation, Inc.",
"License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
"This is free software: you are free to change and redistribute it.",
"GNU $Global::progname comes with no warranty.",
"",
"Web site: http://www.gnu.org/software/${Global::progname}\n",
"When using programs that use GNU Parallel to process data for publication please cite:\n",
"O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ",
";login: The USENIX Magazine, February 2011:42-47.\n",
"Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n",
);
}
sub bibtex {
# Returns: N/A
if($opt::tollef and not $opt::gnu) {
print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n";
}
print join("\n",
"When using programs that use GNU Parallel to process data for publication please cite:",
"",
"\@article{Tange2011a,",
" title = {GNU Parallel - The Command-Line Power Tool},",
" author = {O. Tange},",
" address = {Frederiksberg, Denmark},",
" journal = {;login: The USENIX Magazine},",
" month = {Feb},",
" number = {1},",
" volume = {36},",
" url = {http://www.gnu.org/s/parallel},",
" year = {2011},",
" pages = {42-47}",
"}",
"",
"(Feel free to use \\nocite{Tange2011a})",
"",
"This helps funding further development.",
"",
"Or you can get GNU Parallel without this requirement by paying 10000 EUR.",
""
);
while(not -e $ENV{'HOME'}."/.parallel/will-cite") {
print "\nType: 'will cite' and press enter.\n> ";
my $input = <STDIN>;
if($input =~ /will cite/i) {
mkdir $ENV{'HOME'}."/.parallel";
open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite")
|| ::die_bug("Cannot write: ".$ENV{'HOME'}."/.parallel/will-cite");
close $fh;
print "\nThank you for your support. It is much appreciated. The citation\n",
"notice is now silenced.\n";
}
}
}
sub show_limits {
# Returns: N/A
print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
"Maximal used size of command: ",Limits::Command::max_length(),"\n",
"\n",
"Execution of will continue now, and it will try to read its input\n",
"and run commands; if this is not what you wanted to happen, please\n",
"press CTRL-D or CTRL-C\n");
}
sub __GENERIC_COMMON_FUNCTION__ {}
sub uniq {
# Remove duplicates and return unique values
return keys %{{ map { $_ => 1 } @_ }};
}
sub min {
# Returns:
# Minimum value of array
my $min;
for (@_) {
# Skip undefs
defined $_ or next;
defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
$min = ($min < $_) ? $min : $_;
}
return $min;
}
sub max {
# Returns:
# Maximum value of array
my $max;
for (@_) {
# Skip undefs
defined $_ or next;
defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
$max = ($max > $_) ? $max : $_;
}
return $max;
}
sub sum {
# Returns:
# Sum of values of array
my @args = @_;
my $sum = 0;
for (@args) {
# Skip undefs
$_ and do { $sum += $_; }
}
return $sum;
}
sub undef_as_zero {
my $a = shift;
return $a ? $a : 0;
}
sub undef_as_empty {
my $a = shift;
return $a ? $a : "";
}
{
my $hostname;
sub hostname {
if(not $hostname) {
$hostname = `hostname`;
chomp($hostname);
$hostname ||= "nohostname";
}
return $hostname;
}
}
sub which {
# Input:
# @programs = programs to find the path to
# Returns:
# @full_path = full paths to @programs. Nothing if not found
my @which;
for my $prg (@_) {
push @which, map { $_."/".$prg } grep { -x $_."/".$prg } split(":",$ENV{'PATH'});
}
return @which;
}
{
my ($regexp,%fakename);
sub parent_shell {
# Input:
# $pid = pid to see if (grand)*parent is a shell
# Returns:
# $shellpath = path to shell - undef if no shell found
my $pid = shift;
if(not $regexp) {
# All shells known to mankind
#
# ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
# posh rbash rush rzsh sash sh static-sh tcsh yash zsh
my @shells = qw(ash bash csh dash fdsh fish fizsh ksh
ksh93 mksh pdksh posh rbash rush rzsh
sash sh static-sh tcsh yash zsh -sh -csh);
# Can be formatted as:
# [sh] -sh sh busybox sh
# /bin/sh /sbin/sh /opt/csw/sh
# NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh
my $shell = "(?:".join("|",@shells).")";
$regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| )';
%fakename = (
# csh and tcsh disguise themselves as -sh/-csh
"-sh" => ["csh", "tcsh"],
"-csh" => ["tcsh", "csh"],
);
}
my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
my $shellpath;
my $testpid = $pid;
while($testpid) {
::debug("init", "shell? ". $name_of_ref->{$testpid}."\n");
if($name_of_ref->{$testpid} =~ /$regexp/o) {
::debug("init", "which ".($3||$6)." => ");
$shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0];
::debug("init", "shell path $shellpath\n");
$shellpath and last;
}
$testpid = $parent_of_ref->{$testpid};
}
return $shellpath;
}
}
{
my %pid_parentpid_cmd;
sub pid_table {
# Returns:
# %children_of = { pid -> children of pid }
# %parent_of = { pid -> pid of parent }
# %name_of = { pid -> commandname }
if(not %pid_parentpid_cmd) {
# Filter for SysV-style `ps`
my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
q(s/^.{$s}//; print "@F[1,2] $_"' );
# BSD-style `ps`
my $bsd = q(ps -o pid,ppid,command -ax);
%pid_parentpid_cmd =
(
'aix' => $sysv,
'cygwin' => $sysv,
'msys' => $sysv,
'dec_osf' => $sysv,
'darwin' => $bsd,
'dragonfly' => $bsd,
'freebsd' => $bsd,
'gnu' => $sysv,
'hpux' => $sysv,
'linux' => $sysv,
'mirbsd' => $bsd,
'netbsd' => $bsd,
'nto' => $sysv,
'openbsd' => $bsd,
'solaris' => $sysv,
'svr5' => $sysv,
);
}
$pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
my (@pidtable,%parent_of,%children_of,%name_of);
# Table with pid -> children of pid
@pidtable = `$pid_parentpid_cmd{$^O}`;
my $p=$$;
for (@pidtable) {
# must match: 24436 21224 busybox ash
/(\S+)\s+(\S+)\s+(\S+.*)/ or ::die_bug("pidtable format: $_");
$parent_of{$1} = $2;
push @{$children_of{$2}}, $1;
$name_of{$1} = $3;
}
return(\%children_of, \%parent_of, \%name_of);
}
}
sub reap_usleep {
# Reap dead children.
# If no dead children: Sleep specified amount with exponential backoff
# Input:
# $ms = milliseconds to sleep
# Returns:
# $ms/2+0.001 if children reaped
# $ms*1.1 if no children reaped
my $ms = shift;
if(reaper()) {
# Sleep exponentially shorter (1/2^n) if a job finished
return $ms/2+0.001;
} else {
if($opt::timeout) {
$Global::timeoutq->process_timeouts();
}
usleep($ms);
Job::exit_if_disk_full();
if($opt::linebuffer) {
for my $job (values %Global::running) {
$job->print();
}
}
# Sleep exponentially longer (1.1^n) if a job did not finish
# though at most 1000 ms.
return (($ms < 1000) ? ($ms * 1.1) : ($ms));
}
}
sub usleep {
# Sleep this many milliseconds.
# Input:
# $ms = milliseconds to sleep
my $ms = shift;
::debug(int($ms),"ms ");
select(undef, undef, undef, $ms/1000);
}
sub now {
# Returns time since epoch as in seconds with 3 decimals
# Uses:
# @Global::use
# Returns:
# $time = time now with millisecond accuracy
if(not $Global::use{"Time::HiRes"}) {
if(eval "use Time::HiRes qw ( time );") {
eval "sub TimeHiRestime { return Time::HiRes::time };";
} else {
eval "sub TimeHiRestime { return time() };";
}
$Global::use{"Time::HiRes"} = 1;
}
return (int(TimeHiRestime()*1000))/1000;
}
sub multiply_binary_prefix {
# Evalualte numbers with binary prefix
# Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
# ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
# K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
# k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
# 13G = 13*1024*1024*1024 = 13958643712
# Input:
# $s = string with prefixes
# Returns:
# $value = int with prefixes multiplied
my $s = shift;
$s =~ s/ki/*1024/gi;
$s =~ s/mi/*1024*1024/gi;
$s =~ s/gi/*1024*1024*1024/gi;
$s =~ s/ti/*1024*1024*1024*1024/gi;
$s =~ s/pi/*1024*1024*1024*1024*1024/gi;
$s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi;
$s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
$s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
$s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
$s =~ s/K/*1024/g;
$s =~ s/M/*1024*1024/g;
$s =~ s/G/*1024*1024*1024/g;
$s =~ s/T/*1024*1024*1024*1024/g;
$s =~ s/P/*1024*1024*1024*1024*1024/g;
$s =~ s/E/*1024*1024*1024*1024*1024*1024/g;
$s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
$s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
$s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
$s =~ s/k/*1000/g;
$s =~ s/m/*1000*1000/g;
$s =~ s/g/*1000*1000*1000/g;
$s =~ s/t/*1000*1000*1000*1000/g;
$s =~ s/p/*1000*1000*1000*1000*1000/g;
$s =~ s/e/*1000*1000*1000*1000*1000*1000/g;
$s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g;
$s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
$s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
$s = eval $s;
::debug($s);
return $s;
}
sub tmpfile {
# Create tempfile as $TMPDIR/parXXXXX
# Returns:
# $filename = file name created
return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
}
sub __DEBUGGING__ {}
sub debug {
# Uses:
# $Global::debug
# %Global::fd
# Returns: N/A
$Global::debug or return;
@_ = grep { defined $_ ? $_ : "" } @_;
if($Global::debug eq "all" or $Global::debug eq $_[0]) {
if($Global::fd{1}) {
# Original stdout was saved
my $stdout = $Global::fd{1};
print $stdout @_[1..$#_];
} else {
print @_[1..$#_];
}
}
}
sub my_memory_usage {
# Returns:
# memory usage if found
# 0 otherwise
use strict;
use FileHandle;
my $pid = $$;
if(-e "/proc/$pid/stat") {
my $fh = FileHandle->new("</proc/$pid/stat");
my $data = <$fh>;
chomp $data;
$fh->close;
my @procinfo = split(/\s+/,$data);
return undef_as_zero($procinfo[22]);
} else {
return 0;
}
}
sub my_size {
# Returns:
# $size = size of object if Devel::Size is installed
# -1 otherwise
my @size_this = (@_);
eval "use Devel::Size qw(size total_size)";
if ($@) {
return -1;
} else {
return total_size(@_);
}
}
sub my_dump {
# Returns:
# ascii expression of object if Data::Dump(er) is installed
# error code otherwise
my @dump_this = (@_);
eval "use Data::Dump qw(dump);";
if ($@) {
# Data::Dump not installed
eval "use Data::Dumper;";
if ($@) {
my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
"Not dumping output\n";
print $Global::original_stderr $err;
return $err;
} else {
return Dumper(@dump_this);
}
} else {
# Create a dummy Data::Dump:dump as Hans Schou sometimes has
# it undefined
eval "sub Data::Dump:dump {}";
eval "use Data::Dump qw(dump);";
return (Data::Dump::dump(@dump_this));
}
}
sub my_croak {
eval "use Carp; 1";
$Carp::Verbose = 1;
croak(@_);
}
sub my_carp {
eval "use Carp; 1";
$Carp::Verbose = 1;
carp(@_);
}
sub __OBJECT_ORIENTED_PARTS__ {}
package SSHLogin;
sub new {
my $class = shift;
my $sshlogin_string = shift;
my $ncpus;
my %hostgroups;
# SSHLogins can have these formats:
# @grp+grp/ncpu//usr/bin/ssh user@server
# ncpu//usr/bin/ssh user@server
# /usr/bin/ssh user@server
# user@server
# ncpu/user@server
# @grp+grp/user@server
if($sshlogin_string =~ s:^\@([^/]+)/?::) {
# Look for SSHLogin hostgroups
%hostgroups = map { $_ => 1 } split(/\+/, $1);
}
if ($sshlogin_string =~ s:^(\d+)/::) {
# Override default autodetected ncpus unless missing
$ncpus = $1;
}
my $string = $sshlogin_string;
# An SSHLogin is always in the hostgroup of its $string-name
$hostgroups{$string} = 1;
@Global::hostgroups{keys %hostgroups} = values %hostgroups;
my @unget = ();
my $no_slash_string = $string;
$no_slash_string =~ s/[^-a-z0-9:]/_/gi;
return bless {
'string' => $string,
'jobs_running' => 0,
'jobs_completed' => 0,
'maxlength' => undef,
'max_jobs_running' => undef,
'orig_max_jobs_running' => undef,
'ncpus' => $ncpus,
'hostgroups' => \%hostgroups,
'sshcommand' => undef,
'serverlogin' => undef,
'control_path_dir' => undef,
'control_path' => undef,
'time_to_login' => undef,
'last_login_at' => undef,
'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" .
$no_slash_string,
'loadavg' => undef,
'last_loadavg_update' => 0,
'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" .
$no_slash_string,
'swap_activity' => undef,
}, ref($class) || $class;
}
sub DESTROY {
my $self = shift;
# Remove temporary files if they are created.
unlink $self->{'loadavg_file'};
unlink $self->{'swap_activity_file'};
}
sub string {
my $self = shift;
return $self->{'string'};
}
sub jobs_running {
my $self = shift;
return ($self->{'jobs_running'} || "0");
}
sub inc_jobs_running {
my $self = shift;
$self->{'jobs_running'}++;
}
sub dec_jobs_running {
my $self = shift;
$self->{'jobs_running'}--;
}
sub set_maxlength {
my $self = shift;
$self->{'maxlength'} = shift;
}
sub maxlength {
my $self = shift;
return $self->{'maxlength'};
}
sub jobs_completed {
my $self = shift;
return $self->{'jobs_completed'};
}
sub in_hostgroups {
# Input:
# @hostgroups = the hostgroups to look for
# Returns:
# true if intersection of @hostgroups and the hostgroups of this
# SSHLogin is non-empty
my $self = shift;
return grep { defined $self->{'hostgroups'}{$_} } @_;
}
sub hostgroups {
my $self = shift;
return keys %{$self->{'hostgroups'}};
}
sub inc_jobs_completed {
my $self = shift;
$self->{'jobs_completed'}++;
}
sub set_max_jobs_running {
my $self = shift;
if(defined $self->{'max_jobs_running'}) {
$Global::max_jobs_running -= $self->{'max_jobs_running'};
}
$self->{'max_jobs_running'} = shift;
if(defined $self->{'max_jobs_running'}) {
# max_jobs_running could be resat if -j is a changed file
$Global::max_jobs_running += $self->{'max_jobs_running'};
}
# Initialize orig to the first non-zero value that comes around
$self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
}
sub swapping {
my $self = shift;
my $swapping = $self->swap_activity();
return (not defined $swapping or $swapping)
}
sub swap_activity {
# If the currently known swap activity is too old:
# Recompute a new one in the background
# Returns:
# last swap activity computed
my $self = shift;
# Should we update the swap_activity file?
my $update_swap_activity_file = 0;
if(-r $self->{'swap_activity_file'}) {
open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r");
my $swap_out = <$swap_fh>;
close $swap_fh;
if($swap_out =~ /^(\d+)$/) {
$self->{'swap_activity'} = $1;
::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
}
::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
if(time - $self->{'last_swap_activity_update'} > 10) {
# last swap activity update was started 10 seconds ago
::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
$update_swap_activity_file = 1;
}
} else {
::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
$self->{'swap_activity'} = undef;
$update_swap_activity_file = 1;
}
if($update_swap_activity_file) {
::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
$self->{'last_swap_activity_update'} = time;
-e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
-e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
my $swap_activity;
$swap_activity = swapactivityscript();
if($self->{'string'} ne ":") {
$swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
::shell_quote_scalar($swap_activity);
}
# Run swap_activity measuring.
# As the command can take long to run if run remote
# save it to a tmp file before moving it to the correct file
my $file = $self->{'swap_activity_file'};
my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
::debug("swap", "\n", $swap_activity, "\n");
qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & };
}
return $self->{'swap_activity'};
}
{
my $script;
sub swapactivityscript {
# Returns:
# shellscript for detecting swap activity
#
# arguments for vmstat are OS dependant
# swap_in and swap_out are in different columns depending on OS
#
if(not $script) {
my %vmstat = (
# linux: $7*$8
# $ vmstat 1 2
# procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
# r b swpd free buff cache si so bi bo in cs us sy id wa
# 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
# 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
# solaris: $6*$7
# $ vmstat -S 1 2
# kthr memory page disk faults cpu
# r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
# 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
# 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
# darwin (macosx): $21*$22
# $ vm_stat -c 2 1
# Mach Virtual Memory Statistics: (page size of 4096 bytes)
# free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
# 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
# 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
# ultrix: $12*$13
# $ vmstat -S 1 2
# procs faults cpu memory page disk
# r b w in sy cs us sy id avm fre si so pi po fr de sr s0
# 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
# 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
# aix: $6*$7
# $ vmstat 1 2
# System configuration: lcpu=1 mem=2048MB
#
# kthr memory page faults cpu
# ----- ----------- ------------------------ ------------ -----------
# r b avm fre re pi po fr sr cy in sy cs us sy id wa
# 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
# 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
# freebsd: $8*$9
# $ vmstat -H 1 2
# procs memory page disks faults cpu
# r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
# 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
# 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
# mirbsd: $8*$9
# $ vmstat 1 2
# procs memory page disks traps cpu
# r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
# 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
# 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
# netbsd: $7*$8
# $ vmstat 1 2
# procs memory page disks faults cpu
# r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
# 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
# 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
# openbsd: $8*$9
# $ vmstat 1 2
# procs memory page disks traps cpu
# r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
# 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
# 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
# hpux: $8*$9
# $ vmstat 1 2
# procs memory page faults cpu
# r b w avm free re at pi po fr de sr in sy cs us sy id
# 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
# 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
# dec_osf (tru64): $11*$12
# $ vmstat 1 2
# Virtual Memory Statistics: (pagesize = 8192)
# procs memory pages intr cpu
# r w u act free wire fault cow zero react pin pout in sy cs us sy id
# 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
# 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
# gnu (hurd): $7*$8
# $ vmstat -k 1 2
# (pagesize: 4, size: 512288, swap size: 894972)
# free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
# 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
# 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
# -nto (qnx has no swap)
#-irix
#-svr5 (scosysv)
);
my $perlscript = "";
for my $os (keys %vmstat) {
#q[ { vmstat 1 2 2> /dev/null || vmstat -c 1 2; } | ].
# q[ awk 'NR!=4{next} NF==17||NF==16{print $7*$8} NF==22{print $21*$22} {exit}' ];
$vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
$perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
$vmstat{$os}[1] . '}"` }';
}
$perlscript = "perl -e " . ::shell_quote_scalar($perlscript);
$script = $Global::envvar. " " .$perlscript;
}
return $script;
}
}
sub too_fast_remote_login {
my $self = shift;
if($self->{'last_login_at'} and $self->{'time_to_login'}) {
# sshd normally allows 10 simultaneous logins
# A login takes time_to_login
# So time_to_login/5 should be safe
# If now <= last_login + time_to_login/5: Then it is too soon.
my $too_fast = (::now() <= $self->{'last_login_at'}
+ $self->{'time_to_login'}/5);
::debug("run", "Too fast? $too_fast ");
return $too_fast;
} else {
# No logins so far (or time_to_login not computed): it is not too fast
return 0;
}
}
sub last_login_at {
my $self = shift;
return $self->{'last_login_at'};
}
sub set_last_login_at {
my $self = shift;
$self->{'last_login_at'} = shift;
}
sub loadavg_too_high {
my $self = shift;
my $loadavg = $self->loadavg();
return (not defined $loadavg or
$loadavg > $self->max_loadavg());
}
sub loadavg {
# If the currently know loadavg is too old:
# Recompute a new one in the background
# The load average is computed as the number of processes waiting for disk
# or CPU right now. So it is the server load this instant and not averaged over
# several minutes. This is needed so GNU Parallel will at most start one job
# that will push the load over the limit.
#
# Returns:
# $last_loadavg = last load average computed (undef if none)
my $self = shift;
# Should we update the loadavg file?
my $update_loadavg_file = 0;
if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
local $/ = undef;
my $load_out = <$load_fh>;
close $load_fh;
my $load =()= ($load_out=~/(^[DR]....[^\[])/gm);
if($load > 0) {
# load is overestimated by 1
$self->{'loadavg'} = $load - 1;
::debug("load", "New loadavg: ", $self->{'loadavg'});
} else {
::die_bug("loadavg_invalid_content: $load_out");
}
::debug("load", "Last update: ", $self->{'last_loadavg_update'});
if(time - $self->{'last_loadavg_update'} > 10) {
# last loadavg was started 10 seconds ago
::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ",
$self->{'loadavg_file'});
$update_loadavg_file = 1;
}
} else {
::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
$self->{'loadavg'} = undef;
$update_loadavg_file = 1;
}
if($update_loadavg_file) {
::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
$self->{'last_loadavg_update'} = time;
-e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
-e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
my $cmd = "";
if($self->{'string'} ne ":") {
$cmd = $self->sshcommand() . " " . $self->serverlogin() . " ";
}
# TODO Is is called 'ps ax -o state,command' on other platforms?
$cmd .= "ps ax -o state,command";
# As the command can take long to run if run remote
# save it to a tmp file before moving it to the correct file
my $file = $self->{'loadavg_file'};
my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa");
qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & };
}
return $self->{'loadavg'};
}
sub max_loadavg {
my $self = shift;
# If --load is a file it might be changed
if($Global::max_load_file) {
my $mtime = (stat($Global::max_load_file))[9];
if($mtime > $Global::max_load_file_last_mod) {
$Global::max_load_file_last_mod = $mtime;
for my $sshlogin (values %Global::host) {
$sshlogin->set_max_loadavg(undef);
}
}
}
if(not defined $self->{'max_loadavg'}) {
$self->{'max_loadavg'} =
$self->compute_max_loadavg($opt::load);
}
::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
return $self->{'max_loadavg'};
}
sub set_max_loadavg {
my $self = shift;
$self->{'max_loadavg'} = shift;
}
sub compute_max_loadavg {
# Parse the max loadaverage that the user asked for using --load
# Returns:
# max loadaverage
my $self = shift;
my $loadspec = shift;
my $load;
if(defined $loadspec) {
if($loadspec =~ /^\+(\d+)$/) {
# E.g. --load +2
my $j = $1;
$load =
$self->ncpus() + $j;
} elsif ($loadspec =~ /^-(\d+)$/) {
# E.g. --load -2
my $j = $1;
$load =
$self->ncpus() - $j;
} elsif ($loadspec =~ /^(\d+)\%$/) {
my $j = $1;
$load =
$self->ncpus() * $j / 100;
} elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
$load = $1;
} elsif (-f $loadspec) {
$Global::max_load_file = $loadspec;
$Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
if(open(my $in_fh, "<", $Global::max_load_file)) {
my $opt_load_file = join("",<$in_fh>);
close $in_fh;
$load = $self->compute_max_loadavg($opt_load_file);
} else {
print $Global::original_stderr "Cannot open $loadspec\n";
::wait_and_exit(255);
}
} else {
print $Global::original_stderr "Parsing of --load failed\n";
::die_usage();
}
if($load < 0.01) {
$load = 0.01;
}
}
return $load;
}
sub time_to_login {
my $self = shift;
return $self->{'time_to_login'};
}
sub set_time_to_login {
my $self = shift;
$self->{'time_to_login'} = shift;
}
sub max_jobs_running {
my $self = shift;
if(not defined $self->{'max_jobs_running'}) {
my $nproc = $self->compute_number_of_processes($opt::jobs);
$self->set_max_jobs_running($nproc);
}
return $self->{'max_jobs_running'};
}
sub orig_max_jobs_running {
my $self = shift;
return $self->{'orig_max_jobs_running'};
}
sub compute_number_of_processes {
# Number of processes wanted and limited by system resources
# Returns:
# Number of processes
my $self = shift;
my $opt_P = shift;
my $wanted_processes = $self->user_requested_processes($opt_P);
if(not defined $wanted_processes) {
$wanted_processes = $Global::default_simultaneous_sshlogins;
}
::debug("load", "Wanted procs: $wanted_processes\n");
my $system_limit =
$self->processes_available_by_system_limit($wanted_processes);
::debug("load", "Limited to procs: $system_limit\n");
return $system_limit;
}
sub processes_available_by_system_limit {
# If the wanted number of processes is bigger than the system limits:
# Limit them to the system limits
# Limits are: File handles, number of input lines, processes,
# and taking > 1 second to spawn 10 extra processes
# Returns:
# Number of processes
my $self = shift;
my $wanted_processes = shift;
my $system_limit = 0;
my @jobs = ();
my $job;
my @args = ();
my $arg;
my $more_filehandles = 1;
my $max_system_proc_reached = 0;
my $slow_spawining_warning_printed = 0;
my $time = time;
my %fh;
my @children;
# Reserve filehandles
# perl uses 7 filehandles for something?
# parallel uses 1 for memory_usage
# parallel uses 4 for ?
for my $i (1..12) {
open($fh{"init-$i"}, "<", "/dev/null");
}
for(1..2) {
# System process limit
my $child;
if($child = fork()) {
push (@children,$child);
$Global::unkilled_children{$child} = 1;
} elsif(defined $child) {
# The child takes one process slot
# It will be killed later
$SIG{TERM} = $Global::original_sig{TERM};
sleep 10000000;
exit(0);
} else {
$max_system_proc_reached = 1;
}
}
my $count_jobs_already_read = $Global::JobQueue->next_seq();
my $wait_time_for_getting_args = 0;
my $start_time = time;
while(1) {
$system_limit >= $wanted_processes and last;
not $more_filehandles and last;
$max_system_proc_reached and last;
my $before_getting_arg = time;
if($Global::semaphore or $opt::pipe) {
# Skip: No need to get args
} elsif(defined $opt::retries and $count_jobs_already_read) {
# For retries we may need to run all jobs on this sshlogin
# so include the already read jobs for this sshlogin
$count_jobs_already_read--;
} else {
if($opt::X or $opt::m) {
# The arguments may have to be re-spread over several jobslots
# So pessimistically only read one arg per jobslot
# instead of a full commandline
if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
if($Global::JobQueue->empty()) {
last;
} else {
($job) = $Global::JobQueue->get();
push(@jobs, $job);
}
} else {
($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
push(@args, $arg);
}
} else {
# If there are no more command lines, then we have a process
# per command line, so no need to go further
$Global::JobQueue->empty() and last;
($job) = $Global::JobQueue->get();
push(@jobs, $job);
}
}
$wait_time_for_getting_args += time - $before_getting_arg;
$system_limit++;
# Every simultaneous process uses 2 filehandles when grouping
# Every simultaneous process uses 2 filehandles when compressing
$more_filehandles = open($fh{$system_limit*10}, "<", "/dev/null")
&& open($fh{$system_limit*10+2}, "<", "/dev/null")
&& open($fh{$system_limit*10+3}, "<", "/dev/null")
&& open($fh{$system_limit*10+4}, "<", "/dev/null");
# System process limit
my $child;
if($child = fork()) {
push (@children,$child);
$Global::unkilled_children{$child} = 1;
} elsif(defined $child) {
# The child takes one process slot
# It will be killed later
$SIG{TERM} = $Global::original_sig{TERM};
sleep 10000000;
exit(0);
} else {
$max_system_proc_reached = 1;
}
my $forktime = time - $time - $wait_time_for_getting_args;
::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
$forktime,
" (processes so far: ", $system_limit,")\n");
if($system_limit > 10 and
$forktime > 1 and
$forktime > $system_limit * 0.01
and not $slow_spawining_warning_printed) {
# It took more than 0.01 second to fork a processes on avg.
# Give the user a warning. He can press Ctrl-C if this
# sucks.
print $Global::original_stderr
("parallel: Warning: Starting $system_limit processes took > $forktime sec.\n",
"Consider adjusting -j. Press CTRL-C to stop.\n");
$slow_spawining_warning_printed = 1;
}
}
# Cleanup: Close the files
for (values %fh) { close $_ }
# Cleanup: Kill the children
for my $pid (@children) {
kill 9, $pid;
waitpid($pid,0);
delete $Global::unkilled_children{$pid};
}
# Cleanup: Unget the command_lines or the @args
$Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
$Global::JobQueue->unget(@jobs);
if($system_limit < $wanted_processes) {
# The system_limit is less than the wanted_processes
if($system_limit < 1 and not $Global::JobQueue->empty()) {
::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n",
"or /proc/sys/kernel/pid_max may help.\n");
::wait_and_exit(255);
}
if(not $more_filehandles) {
::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n",
"Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ",
"raising ulimit -n or /etc/security/limits.conf may help.\n");
}
if($max_system_proc_reached) {
::warning("Only enough available processes to run ", $system_limit,
" jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n",
"or /proc/sys/kernel/pid_max may help.\n");
}
}
if($] == 5.008008 and $system_limit > 1000) {
# https://savannah.gnu.org/bugs/?36942
$system_limit = 1000;
}
if($Global::JobQueue->empty()) {
$system_limit ||= 1;
}
if($self->string() ne ":" and
$system_limit > $Global::default_simultaneous_sshlogins) {
$system_limit =
$self->simultaneous_sshlogin_limit($system_limit);
}
return $system_limit;
}
sub simultaneous_sshlogin_limit {
# Test by logging in wanted number of times simultaneously
# Returns:
# min($wanted_processes,$working_simultaneous_ssh_logins-1)
my $self = shift;
my $wanted_processes = shift;
if($self->{'time_to_login'}) {
return $wanted_processes;
}
# Try twice because it guesses wrong sometimes
# Choose the minimal
my $ssh_limit =
::min($self->simultaneous_sshlogin($wanted_processes),
$self->simultaneous_sshlogin($wanted_processes));
if($ssh_limit < $wanted_processes) {
my $serverlogin = $self->serverlogin();
::warning("ssh to $serverlogin only allows ",
"for $ssh_limit simultaneous logins.\n",
"You may raise this by changing ",
"/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n",
"Using only ",$ssh_limit-1," connections ",
"to avoid race conditions.\n");
}
# Race condition can cause problem if using all sshs.
if($ssh_limit > 1) { $ssh_limit -= 1; }
return $ssh_limit;
}
sub simultaneous_sshlogin {
# Using $sshlogin try to see if we can do $wanted_processes
# simultaneous logins
# (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l
# Returns:
# Number of succesful logins
my $self = shift;
my $wanted_processes = shift;
my $sshcmd = $self->sshcommand();
my $serverlogin = $self->serverlogin();
my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin </dev/null 2>&1 &"x$wanted_processes;
::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
::die_bug("simultaneouslogin");
my $ssh_limit = <$simul_fh>;
close $simul_fh;
chomp $ssh_limit;
return $ssh_limit;
}
sub set_ncpus {
my $self = shift;
$self->{'ncpus'} = shift;
}
sub user_requested_processes {
# Parse the number of processes that the user asked for using -j
# Returns:
# the number of processes to run on this sshlogin
my $self = shift;
my $opt_P = shift;
my $processes;
if(defined $opt_P) {
if($opt_P =~ /^\+(\d+)$/) {
# E.g. -P +2
my $j = $1;
$processes =
$self->ncpus() + $j;
} elsif ($opt_P =~ /^-(\d+)$/) {
# E.g. -P -2
my $j = $1;
$processes =
$self->ncpus() - $j;
} elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
# E.g. -P 10.5%
my $j = $1;
$processes =
$self->ncpus() * $j / 100;
} elsif ($opt_P =~ /^(\d+)$/) {
$processes = $1;
if($processes == 0) {
# -P 0 = infinity (or at least close)
$processes = $Global::infinity;
}
} elsif (-f $opt_P) {
$Global::max_procs_file = $opt_P;
$Global::max_procs_file_last_mod = (stat($Global::max_procs_file))[9];
if(open(my $in_fh, "<", $Global::max_procs_file)) {
my $opt_P_file = join("",<$in_fh>);
close $in_fh;
$processes = $self->user_requested_processes($opt_P_file);
} else {
::error("Cannot open $opt_P.\n");
::wait_and_exit(255);
}
} else {
::error("Parsing of --jobs/-j/--max-procs/-P failed.\n");
::die_usage();
}
$processes = ::ceil($processes);
}
return $processes;
}
sub ncpus {
my $self = shift;
if(not defined $self->{'ncpus'}) {
my $sshcmd = $self->sshcommand();
my $serverlogin = $self->serverlogin();
if($serverlogin eq ":") {
if($opt::use_cpus_instead_of_cores) {
$self->{'ncpus'} = no_of_cpus();
} else {
$self->{'ncpus'} = no_of_cores();
}
} else {
my $ncpu;
my $sqe = ::shell_quote_scalar($Global::envvar);
if($opt::use_cpus_instead_of_cores) {
$ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus);
} else {
::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n));
$ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores);
}
chomp $ncpu;
if($ncpu =~ /^\s*[0-9]+\s*$/s) {
$self->{'ncpus'} = $ncpu;
} else {
::warning("Could not figure out ",
"number of cpus on $serverlogin ($ncpu). Using 1.\n");
$self->{'ncpus'} = 1;
}
}
}
return $self->{'ncpus'};
}
sub no_of_cpus {
# Returns:
# Number of physical CPUs
local $/="\n"; # If delimiter is set, then $/ will be wrong
my $no_of_cpus;
if ($^O eq 'linux') {
$no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux();
} elsif ($^O eq 'freebsd') {
$no_of_cpus = no_of_cpus_freebsd();
} elsif ($^O eq 'netbsd') {
$no_of_cpus = no_of_cpus_netbsd();
} elsif ($^O eq 'openbsd') {
$no_of_cpus = no_of_cpus_openbsd();
} elsif ($^O eq 'gnu') {
$no_of_cpus = no_of_cpus_hurd();
} elsif ($^O eq 'darwin') {
$no_of_cpus = no_of_cpus_darwin();
} elsif ($^O eq 'solaris') {
$no_of_cpus = no_of_cpus_solaris();
} elsif ($^O eq 'aix') {
$no_of_cpus = no_of_cpus_aix();
} elsif ($^O eq 'hpux') {
$no_of_cpus = no_of_cpus_hpux();
} elsif ($^O eq 'nto') {
$no_of_cpus = no_of_cpus_qnx();
} elsif ($^O eq 'svr5') {
$no_of_cpus = no_of_cpus_openserver();
} elsif ($^O eq 'irix') {
$no_of_cpus = no_of_cpus_irix();
} elsif ($^O eq 'dec_osf') {
$no_of_cpus = no_of_cpus_tru64();
} else {
$no_of_cpus = (no_of_cpus_gnu_linux()
|| no_of_cpus_freebsd()
|| no_of_cpus_netbsd()
|| no_of_cpus_openbsd()
|| no_of_cpus_hurd()
|| no_of_cpus_darwin()
|| no_of_cpus_solaris()
|| no_of_cpus_aix()
|| no_of_cpus_hpux()
|| no_of_cpus_qnx()
|| no_of_cpus_openserver()
|| no_of_cpus_irix()
|| no_of_cpus_tru64()
# Number of cores is better than no guess for #CPUs
|| nproc()
);
}
if($no_of_cpus) {
chomp $no_of_cpus;
return $no_of_cpus;
} else {
::warning("Cannot figure out number of cpus. Using 1.\n");
return 1;
}
}
sub no_of_cores {
# Returns:
# Number of CPU cores
local $/="\n"; # If delimiter is set, then $/ will be wrong
my $no_of_cores;
if ($^O eq 'linux') {
$no_of_cores = no_of_cores_gnu_linux();
} elsif ($^O eq 'freebsd') {
$no_of_cores = no_of_cores_freebsd();
} elsif ($^O eq 'netbsd') {
$no_of_cores = no_of_cores_netbsd();
} elsif ($^O eq 'openbsd') {
$no_of_cores = no_of_cores_openbsd();
} elsif ($^O eq 'gnu') {
$no_of_cores = no_of_cores_hurd();
} elsif ($^O eq 'darwin') {
$no_of_cores = no_of_cores_darwin();
} elsif ($^O eq 'solaris') {
$no_of_cores = no_of_cores_solaris();
} elsif ($^O eq 'aix') {
$no_of_cores = no_of_cores_aix();
} elsif ($^O eq 'hpux') {
$no_of_cores = no_of_cores_hpux();
} elsif ($^O eq 'nto') {
$no_of_cores = no_of_cores_qnx();
} elsif ($^O eq 'svr5') {
$no_of_cores = no_of_cores_openserver();
} elsif ($^O eq 'irix') {
$no_of_cores = no_of_cores_irix();
} elsif ($^O eq 'dec_osf') {
$no_of_cores = no_of_cores_tru64();
} else {
$no_of_cores = (no_of_cores_gnu_linux()
|| no_of_cores_freebsd()
|| no_of_cores_netbsd()
|| no_of_cores_openbsd()
|| no_of_cores_hurd()
|| no_of_cores_darwin()
|| no_of_cores_solaris()
|| no_of_cores_aix()
|| no_of_cores_hpux()
|| no_of_cores_qnx()
|| no_of_cores_openserver()
|| no_of_cores_irix()
|| no_of_cores_tru64()
|| nproc()
);
}
if($no_of_cores) {
chomp $no_of_cores;
return $no_of_cores;
} else {
::warning("Cannot figure out number of CPU cores. Using 1.\n");
return 1;
}
}
sub nproc {
# Returns:
# Number of cores using `nproc`
my $no_of_cores = `nproc 2>/dev/null`;
return $no_of_cores;
}
sub no_of_cpus_gnu_linux {
# Returns:
# Number of physical CPUs on GNU/Linux
# undef if not GNU/Linux
my $no_of_cpus;
my $no_of_cores;
if(-e "/proc/cpuinfo") {
$no_of_cpus = 0;
$no_of_cores = 0;
my %seen;
open(my $in_fh, "<", "/proc/cpuinfo") || return undef;
while(<$in_fh>) {
if(/^physical id.*[:](.*)/ and not $seen{$1}++) {
$no_of_cpus++;
}
/^processor.*[:]/i and $no_of_cores++;
}
close $in_fh;
}
return ($no_of_cpus||$no_of_cores);
}
sub no_of_cores_gnu_linux {
# Returns:
# Number of CPU cores on GNU/Linux
# undef if not GNU/Linux
my $no_of_cores;
if(-e "/proc/cpuinfo") {
$no_of_cores = 0;
open(my $in_fh, "<", "/proc/cpuinfo") || return undef;
while(<$in_fh>) {
/^processor.*[:]/i and $no_of_cores++;
}
close $in_fh;
}
return $no_of_cores;
}
sub no_of_cpus_freebsd {
# Returns:
# Number of physical CPUs on FreeBSD
# undef if not FreeBSD
my $no_of_cpus =
(`sysctl -a dev.cpu 2>/dev/null | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }'`
or
`sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`);
chomp $no_of_cpus;
return $no_of_cpus;
}
sub no_of_cores_freebsd {
# Returns:
# Number of CPU cores on FreeBSD
# undef if not FreeBSD
my $no_of_cores =
(`sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`
or
`sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`);
chomp $no_of_cores;
return $no_of_cores;
}
sub no_of_cpus_netbsd {
# Returns:
# Number of physical CPUs on NetBSD
# undef if not NetBSD
my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`;
chomp $no_of_cpus;
return $no_of_cpus;
}
sub no_of_cores_netbsd {
# Returns:
# Number of CPU cores on NetBSD
# undef if not NetBSD
my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`;
chomp $no_of_cores;
return $no_of_cores;
}
sub no_of_cpus_openbsd {
# Returns:
# Number of physical CPUs on OpenBSD
# undef if not OpenBSD
my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`;
chomp $no_of_cpus;
return $no_of_cpus;
}
sub no_of_cores_openbsd {
# Returns:
# Number of CPU cores on OpenBSD
# undef if not OpenBSD
my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`;
chomp $no_of_cores;
return $no_of_cores;
}
sub no_of_cpus_hurd {
# Returns:
# Number of physical CPUs on HURD
# undef if not HURD
my $no_of_cpus = `nproc`;
chomp $no_of_cpus;
return $no_of_cpus;
}
sub no_of_cores_hurd {
# Returns:
# Number of physical CPUs on HURD
# undef if not HURD
my $no_of_cores = `nproc`;
chomp $no_of_cores;
return $no_of_cores;
}
sub no_of_cpus_darwin {
# Returns:
# Number of physical CPUs on Mac Darwin
# undef if not Mac Darwin
my $no_of_cpus =
(`sysctl -n hw.physicalcpu 2>/dev/null`
or
`sysctl -a hw 2>/dev/null | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }'`);
return $no_of_cpus;
}
sub no_of_cores_darwin {
# Returns:
# Number of CPU cores on Mac Darwin
# undef if not Mac Darwin
my $no_of_cores =
(`sysctl -n hw.logicalcpu 2>/dev/null`
or
`sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`);
return $no_of_cores;
}
sub no_of_cpus_solaris {
# Returns:
# Number of physical CPUs on Solaris
# undef if not Solaris
if(-x "/usr/sbin/psrinfo") {
my @psrinfo = `/usr/sbin/psrinfo`;
if($#psrinfo >= 0) {
return $#psrinfo +1;
}
}
if(-x "/usr/sbin/prtconf") {
my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`;
if($#prtconf >= 0) {
return $#prtconf +1;
}
}
return undef;
}
sub no_of_cores_solaris {
# Returns:
# Number of CPU cores on Solaris
# undef if not Solaris
if(-x "/usr/sbin/psrinfo") {
my @psrinfo = `/usr/sbin/psrinfo`;
if($#psrinfo >= 0) {
return $#psrinfo +1;
}
}
if(-x "/usr/sbin/prtconf") {
my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`;
if($#prtconf >= 0) {
return $#prtconf +1;
}
}
return undef;
}
sub no_of_cpus_aix {
# Returns:
# Number of physical CPUs on AIX
# undef if not AIX
my $no_of_cpus = 0;
if(-x "/usr/sbin/lscfg") {
open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")
|| return undef;
$no_of_cpus = <$in_fh>;
chomp ($no_of_cpus);
close $in_fh;
}
return $no_of_cpus;
}
sub no_of_cores_aix {
# Returns:
# Number of CPU cores on AIX
# undef if not AIX
my $no_of_cores;
if(-x "/usr/bin/vmstat") {
open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef;
while(<$in_fh>) {
/lcpu=([0-9]*) / and $no_of_cores = $1;
}
close $in_fh;
}
return $no_of_cores;
}
sub no_of_cpus_hpux {
# Returns:
# Number of physical CPUs on HP-UX
# undef if not HP-UX
my $no_of_cpus =
(`/usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'`);
return $no_of_cpus;
}
sub no_of_cores_hpux {
# Returns:
# Number of CPU cores on HP-UX
# undef if not HP-UX
my $no_of_cores =
(`/usr/bin/mpsched -s 2>&1 | grep 'Processor Count' | awk '{ print \$3 }'`);
return $no_of_cores;
}
sub no_of_cpus_qnx {
# Returns:
# Number of physical CPUs on QNX
# undef if not QNX
# BUG: It is now known how to calculate this.
my $no_of_cpus = 0;
return $no_of_cpus;
}
sub no_of_cores_qnx {
# Returns:
# Number of CPU cores on QNX
# undef if not QNX
# BUG: It is now known how to calculate this.
my $no_of_cores = 0;
return $no_of_cores;
}
sub no_of_cpus_openserver {
# Returns:
# Number of physical CPUs on SCO OpenServer
# undef if not SCO OpenServer
my $no_of_cpus = 0;
if(-x "/usr/sbin/psrinfo") {
my @psrinfo = `/usr/sbin/psrinfo`;
if($#psrinfo >= 0) {
return $#psrinfo +1;
}
}
return $no_of_cpus;
}
sub no_of_cores_openserver {
# Returns:
# Number of CPU cores on SCO OpenServer
# undef if not SCO OpenServer
my $no_of_cores = 0;
if(-x "/usr/sbin/psrinfo") {
my @psrinfo = `/usr/sbin/psrinfo`;
if($#psrinfo >= 0) {
return $#psrinfo +1;
}
}
return $no_of_cores;
}
sub no_of_cpus_irix {
# Returns:
# Number of physical CPUs on IRIX
# undef if not IRIX
my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`;
return $no_of_cpus;
}
sub no_of_cores_irix {
# Returns:
# Number of CPU cores on IRIX
# undef if not IRIX
my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`;
return $no_of_cores;
}
sub no_of_cpus_tru64 {
# Returns:
# Number of physical CPUs on Tru64
# undef if not Tru64
my $no_of_cpus = `sizer -pr`;
return $no_of_cpus;
}
sub no_of_cores_tru64 {
# Returns:
# Number of CPU cores on Tru64
# undef if not Tru64
my $no_of_cores = `sizer -pr`;
return $no_of_cores;
}
sub sshcommand {
my $self = shift;
if (not defined $self->{'sshcommand'}) {
$self->sshcommand_of_sshlogin();
}
return $self->{'sshcommand'};
}
sub serverlogin {
my $self = shift;
if (not defined $self->{'serverlogin'}) {
$self->sshcommand_of_sshlogin();
}
return $self->{'serverlogin'};
}
sub sshcommand_of_sshlogin {
# 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
# 'user@server' -> ('ssh','user@server')
# 'myssh user@server' -> ('myssh','user@server')
# 'myssh -l user server' -> ('myssh -l user','server')
# '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
# Returns:
# sshcommand - defaults to 'ssh'
# login@host
my $self = shift;
my ($sshcmd, $serverlogin);
if($self->{'string'} =~ /(.+) (\S+)$/) {
# Own ssh command
$sshcmd = $1; $serverlogin = $2;
} else {
# Normal ssh
if($opt::controlmaster) {
# Use control_path to make ssh faster
my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
$sshcmd = "ssh -S ".$control_path;
$serverlogin = $self->{'string'};
if(not $self->{'control_path'}{$control_path}++) {
# Master is not running for this control_path
# Start it
my $pid = fork();
if($pid) {
$Global::sshmaster{$pid} ||= 1;
} else {
$SIG{'TERM'} = undef;
# Ignore the 'foo' being printed
open(STDOUT,">","/dev/null");
# OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
# STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument"
open(STDERR,">","/dev/null");
open(STDIN,"<","/dev/null");
# Run a sleep that outputs data, so it will discover if the ssh connection closes.
my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}');
my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep);
exec(@master);
}
}
} else {
$sshcmd = "ssh"; $serverlogin = $self->{'string'};
}
}
$self->{'sshcommand'} = $sshcmd;
$self->{'serverlogin'} = $serverlogin;
}
sub control_path_dir {
# Returns:
# path to directory
my $self = shift;
if(not defined $self->{'control_path_dir'}) {
-e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
-e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
$self->{'control_path_dir'} =
File::Temp::tempdir($ENV{'HOME'}
. "/.parallel/tmp/control_path_dir-XXXX",
CLEANUP => 1);
}
return $self->{'control_path_dir'};
}
sub rsync_transfer_cmd {
# Command to run to transfer a file
# Input:
# $file = filename of file to transfer
# $workdir = destination dir
# Returns:
# $cmd = rsync command to run to transfer $file ("" if unreadable)
my $self = shift;
my $file = shift;
my $workdir = shift;
if(not -r $file) {
::warning($file, " is not readable and will not be transferred.\n");
return "true";
}
my $rsync_destdir;
if($file =~ m:^/:) {
# rsync /foo/bar /
$rsync_destdir = "/";
} else {
$rsync_destdir = ::shell_quote_file($workdir);
}
$file = ::shell_quote_file($file);
my $sshcmd = $self->sshcommand();
my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd);
my $serverlogin = $self->serverlogin();
# Make dir if it does not exist
return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" .
rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )";
}
sub cleanup_cmd {
# Command to run to remove the remote file
# Input:
# $file = filename to remove
# $workdir = destination dir
# Returns:
# $cmd = ssh command to run to remove $file and empty parent dirs
my $self = shift;
my $file = shift;
my $workdir = shift;
my $f = $file;
if($f =~ m:/\./:) {
# foo/bar/./baz/quux => workdir/baz/quux
# /foo/bar/./baz/quux => workdir/baz/quux
$f =~ s:.*/\./:$workdir/:;
} elsif($f =~ m:^[^/]:) {
# foo/bar => workdir/foo/bar
$f = $workdir."/".$f;
}
my @subdirs = split m:/:, ::dirname($f);
my @rmdir;
my $dir = "";
for(@subdirs) {
$dir .= $_."/";
unshift @rmdir, ::shell_quote_file($dir);
}
my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : "";
if(defined $opt::workdir and $opt::workdir eq "...") {
$rmdir .= "rm -rf " . ::shell_quote_file($workdir).';';
}
$f = ::shell_quote_file($f);
my $sshcmd = $self->sshcommand();
my $serverlogin = $self->serverlogin();
return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)");
}
{
my $rsync;
sub rsync {
# rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
# If the version >= 3.1.0: downgrade to protocol 30
if(not $rsync) {
my @out = `rsync --version`;
for (@out) {
if(/version (\d+.\d+)(.\d+)?/) {
if($1 >= 3.1) {
# Version 3.1.0 or later: Downgrade to protocol 30
$rsync = "rsync --protocol 30";
} else {
$rsync = "rsync";
}
}
}
$rsync or ::die_bug("Cannot figure out version of rsync: @out");
}
return $rsync;
}
}
package JobQueue;
sub new {
my $class = shift;
my $commandref = shift;
my $read_from = shift;
my $context_replace = shift;
my $max_number_of_args = shift;
my $return_files = shift;
my $commandlinequeue = CommandLineQueue->new
($commandref, $read_from, $context_replace, $max_number_of_args,
$return_files);
my @unget = ();
return bless {
'unget' => \@unget,
'commandlinequeue' => $commandlinequeue,
'total_jobs' => undef,
}, ref($class) || $class;
}
sub get {
my $self = shift;
if(@{$self->{'unget'}}) {
my $job = shift @{$self->{'unget'}};
return ($job);
} else {
my $commandline = $self->{'commandlinequeue'}->get();
if(defined $commandline) {
my $job = Job->new($commandline);
return $job;
} else {
return undef;
}
}
}
sub unget {
my $self = shift;
unshift @{$self->{'unget'}}, @_;
}
sub empty {
my $self = shift;
my $empty = (not @{$self->{'unget'}})
&& $self->{'commandlinequeue'}->empty();
::debug("run", "JobQueue->empty $empty ");
return $empty;
}
sub total_jobs {
my $self = shift;
if(not defined $self->{'total_jobs'}) {
my $job;
my @queue;
my $start = time;
while($job = $self->get()) {
if(time - $start > 10) {
::warning("Reading all arguments takes longer than 10 seconds.\n");
$opt::eta && ::warning("Consider removing --eta.\n");
$opt::bar && ::warning("Consider removing --bar.\n");
last;
}
push @queue, $job;
}
while($job = $self->get()) {
push @queue, $job;
}
$self->unget(@queue);
$self->{'total_jobs'} = $#queue+1;
}
return $self->{'total_jobs'};
}
sub next_seq {
my $self = shift;
return $self->{'commandlinequeue'}->seq();
}
sub quote_args {
my $self = shift;
return $self->{'commandlinequeue'}->quote_args();
}
package Job;
sub new {
my $class = shift;
my $commandlineref = shift;
return bless {
'commandline' => $commandlineref, # CommandLine object
'workdir' => undef, # --workdir
'stdin' => undef, # filehandle for stdin (used for --pipe)
# filename for writing stdout to (used for --files)
'remaining' => "", # remaining data not sent to stdin (used for --pipe)
'datawritten' => 0, # amount of data sent via stdin (used for --pipe)
'transfersize' => 0, # size of files using --transfer
'returnsize' => 0, # size of files using --return
'pid' => undef,
# hash of { SSHLogins => number of times the command failed there }
'failed' => undef,
'sshlogin' => undef,
# The commandline wrapped with rsync and ssh
'sshlogin_wrap' => undef,
'exitstatus' => undef,
'exitsignal' => undef,
# Timestamp for timeout if any
'timeout' => undef,
'virgin' => 1,
}, ref($class) || $class;
}
sub replaced {
my $self = shift;
$self->{'commandline'} or ::die_bug("commandline empty");
return $self->{'commandline'}->replaced();
}
sub seq {
my $self = shift;
return $self->{'commandline'}->seq();
}
sub slot {
my $self = shift;
return $self->{'commandline'}->slot();
}
{
my($cattail);
sub cattail {
# Returns:
# $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
if(not $cattail) {
$cattail = q{
# cat followed by tail.
# If $writerpid dead: finish after this round
use Fcntl;
$|=1;
my ($cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
if($read_file) {
open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
} else {
*IN = *STDIN;
}
my $flags;
fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
$flags |= O_NONBLOCK; # Add non-blocking to the flags
fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
while(1) {
# clear EOF
seek(IN,0,1);
my $writer_running = kill 0, $writerpid;
$read = sysread(IN,$buf,32768);
if($read) {
# We can unlink the file now: The writer has written something
-e $unlink_file and unlink $unlink_file;
# Blocking print
while($buf) {
my $bytes_written = syswrite(OUT,$buf);
# syswrite may be interrupted by SIGHUP
substr($buf,0,$bytes_written) = "";
}
# Something printed: Wait less next time
$sleep /= 2;
} else {
if(eof(IN) and not $writer_running) {
# Writer dead: There will never be more to read => exit
exit;
}
# TODO This could probably be done more efficiently using select(2)
# Nothing read: Wait longer before next read
# Up to 30 milliseconds
$sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
usleep($sleep);
}
}
sub usleep {
# Sleep this many milliseconds.
my $secs = shift;
select(undef, undef, undef, $secs/1000);
}
};
$cattail =~ s/#.*//mg;
$cattail =~ s/\s+/ /g;
}
return $cattail;
}
}
sub openoutputfiles {
# Open files for STDOUT and STDERR
# Set file handles in $self->fh
my $self = shift;
my ($outfhw, $errfhw, $outname, $errname);
if($opt::results) {
my $args_as_dirname = $self->{'commandline'}->args_as_dirname();
# Output in: prefix/name1/val1/name2/val2/stdout
my $dir = $opt::results."/".$args_as_dirname;
if(eval{ File::Path::mkpath($dir); }) {
# OK
} else {
# mkpath failed: Argument probably too long.
# Set $Global::max_file_length, which will keep the individual
# dir names shorter than the max length
max_file_name_length($opt::results);
$args_as_dirname = $self->{'commandline'}->args_as_dirname();
# prefix/name1/val1/name2/val2/
$dir = $opt::results."/".$args_as_dirname;
File::Path::mkpath($dir);
}
# prefix/name1/val1/name2/val2/stdout
$outname = "$dir/stdout";
if(not open($outfhw, "+>", $outname)) {
::error("Cannot write to `$outname'.\n");
::wait_and_exit(255);
}
# prefix/name1/val1/name2/val2/stderr
$errname = "$dir/stderr";
if(not open($errfhw, "+>", $errname)) {
::error("Cannot write to `$errname'.\n");
::wait_and_exit(255);
}
$self->set_fh(1,"unlink","");
$self->set_fh(2,"unlink","");
} elsif(not $opt::ungroup) {
# To group we create temporary files for STDOUT and STDERR
# To avoid the cleanup unlink the files immediately (but keep them open)
if(@Global::tee_jobs) {
# files must be removed when the tee is done
} elsif($opt::files) {
($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
# --files => only remove stderr
$self->set_fh(1,"unlink","");
$self->set_fh(2,"unlink",$errname);
} else {
($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
$self->set_fh(1,"unlink",$outname);
$self->set_fh(2,"unlink",$errname);
}
} else {
# --ungroup
open($outfhw,">&",$Global::fd{1}) || die;
open($errfhw,">&",$Global::fd{2}) || die;
# File name must be empty as it will otherwise be printed
$outname = "";
$errname = "";
$self->set_fh(1,"unlink",$outname);
$self->set_fh(2,"unlink",$errname);
}
# Set writing FD
$self->set_fh(1,'w',$outfhw);
$self->set_fh(2,'w',$errfhw);
$self->set_fh(1,'name',$outname);
$self->set_fh(2,'name',$errname);
if($opt::compress) {
# Send stdout to stdin for $opt::compress_program(1)
# Send stderr to stdin for $opt::compress_program(2)
# cattail get pid: $pid = $self->fh($fdno,'rpid');
my $cattail = cattail();
for my $fdno (1,2) {
my $wpid = open(my $fdw,"|-","$opt::compress_program >>".
$self->fh($fdno,'name')) || die $?;
$self->set_fh($fdno,'w',$fdw);
$self->set_fh($fdno,'wpid',$wpid);
my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail,
$opt::decompress_program, $wpid,
$self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?;
$self->set_fh($fdno,'r',$fdr);
$self->set_fh($fdno,'rpid',$rpid);
}
} elsif(not $opt::ungroup) {
# Set reading FD if using --group (--ungroup does not need)
for my $fdno (1,2) {
# Re-open the file for reading
# so fdw can be closed separately
# and fdr can be seeked separately (for --line-buffer)
open(my $fdr,"<", $self->fh($fdno,'name')) ||
::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
$self->set_fh($fdno,'r',$fdr);
# Unlink if required
$Global::debug or unlink $self->fh($fdno,"unlink");
}
}
if($opt::linebuffer) {
# Set non-blocking when using --linebuffer
$Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
for my $fdno (1,2) {
my $fdr = $self->fh($fdno,'r');
my $flags;
fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
$flags |= &O_NONBLOCK; # Add non-blocking to the flags
fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
}
}
}
sub max_file_name_length {
# Figure out the max length of a subdir
# TODO and the max total length
# Ext4 = 255,130816
my $testdir = shift;
my $upper = 8_000_000;
my $len = 8;
my $dir="x"x$len;
do {
rmdir($testdir."/".$dir);
$len *= 16;
$dir="x"x$len;
} while (mkdir $testdir."/".$dir);
# Then search for the actual max length between $len/16 and $len
my $min = $len/16;
my $max = $len;
while($max-$min > 5) {
# If we are within 5 chars of the exact value:
# it is not worth the extra time to find the exact value
my $test = int(($min+$max)/2);
$dir="x"x$test;
if(mkdir $testdir."/".$dir) {
rmdir($testdir."/".$dir);
$min = $test;
} else {
$max = $test;
}
}
$Global::max_file_length = $min;
return $min;
}
sub set_fh {
# Set file handle
my ($self, $fd_no, $key, $fh) = @_;
$self->{'fd'}{$fd_no,$key} = $fh;
}
sub fh {
# Get file handle
my ($self, $fd_no, $key) = @_;
return $self->{'fd'}{$fd_no,$key};
}
sub write {
my $self = shift;
my $remaining_ref = shift;
my $stdin_fh = $self->fh(0,"w");
syswrite($stdin_fh,$$remaining_ref);
}
sub set_stdin_buffer {
# Copy stdin buffer from $block_ref up to $endpos
# Prepend with $header_ref
# Remove $recstart and $recend if needed
# Input:
# $header_ref = ref to $header to prepend
# $block_ref = ref to $block to pass on
# $endpos = length of $block to pass on
# $recstart = --recstart regexp
# $recend = --recend regexp
# Returns:
# N/A
my $self = shift;
my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_;
$self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos);
if($opt::remove_rec_sep) {
remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend);
}
$self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'};
$self->{'stdin_buffer_pos'} = 0;
}
sub stdin_buffer_length {
my $self = shift;
return $self->{'stdin_buffer_length'};
}
sub remove_rec_sep {
my ($block_ref,$recstart,$recend) = @_;
# Remove record separator
$$block_ref =~ s/$recend$recstart//gos;
$$block_ref =~ s/^$recstart//os;
$$block_ref =~ s/$recend$//os;
}
sub non_block_write {
my $self = shift;
my $something_written = 0;
use POSIX qw(:errno_h);
# use Fcntl;
# my $flags = '';
for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) {
my $in = $self->fh(0,"w");
# fcntl($in, F_GETFL, $flags)
# or die "Couldn't get flags for HANDLE : $!\n";
# $flags |= O_NONBLOCK;
# fcntl($in, F_SETFL, $flags)
# or die "Couldn't set flags for HANDLE: $!\n";
my $rv = syswrite($in, $buf);
if (!defined($rv) && $! == EAGAIN) {
# would block
$something_written = 0;
} elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) {
# incomplete write
# Remove the written part
$self->{'stdin_buffer_pos'} += $rv;
$something_written = $rv;
} else {
# successfully wrote everything
my $a="";
$self->set_stdin_buffer(\$a,\$a,"","");
$something_written = $rv;
}
}
::debug("pipe", "Non-block: ", $something_written);
return $something_written;
}
sub virgin {
my $self = shift;
return $self->{'virgin'};
}
sub set_virgin {
my $self = shift;
$self->{'virgin'} = shift;
}
sub pid {
my $self = shift;
return $self->{'pid'};
}
sub set_pid {
my $self = shift;
$self->{'pid'} = shift;
}
sub starttime {
# Returns:
# UNIX-timestamp this job started
my $self = shift;
return sprintf("%.3f",$self->{'starttime'});
}
sub set_starttime {
my $self = shift;
my $starttime = shift || ::now();
$self->{'starttime'} = $starttime;
}
sub runtime {
# Returns:
# Run time in seconds
my $self = shift;
return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000);
}
sub endtime {
# Returns:
# UNIX-timestamp this job ended
# 0 if not ended yet
my $self = shift;
return ($self->{'endtime'} || 0);
}
sub set_endtime {
my $self = shift;
my $endtime = shift;
$self->{'endtime'} = $endtime;
}
sub timedout {
# Is the job timedout?
# Input:
# $delta_time = time that the job may run
# Returns:
# True or false
my $self = shift;
my $delta_time = shift;
return time > $self->{'starttime'} + $delta_time;
}
sub kill {
# Kill the job.
# Send the signals to (grand)*children and pid.
# If no signals: TERM TERM KILL
# Wait 200 ms after each TERM.
# Input:
# @signals = signals to send
my $self = shift;
my @signals = @_;
my @family_pids = $self->family_pids();
# Record this jobs as failed
$self->set_exitstatus(-1);
# Send two TERMs to give time to clean up
::debug("run", "Kill seq ", $self->seq(), "\n");
my @send_signals = @signals || ("TERM", "TERM", "KILL");
for my $signal (@send_signals) {
my $alive = 0;
for my $pid (@family_pids) {
if(kill 0, $pid) {
# The job still running
kill $signal, $pid;
$alive = 1;
}
}
# If a signal was given as input, do not do the sleep below
@signals and next;
if($signal eq "TERM" and $alive) {
# Wait up to 200 ms between TERMs - but only if any pids are alive
my $sleep = 1;
for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200;
$sleepsum += $sleep) {
$sleep = ::reap_usleep($sleep);
}
}
}
}
sub family_pids {
# Find the pids with this->pid as (grand)*parent
# Returns:
# @pids = pids of (grand)*children
my $self = shift;
my $pid = $self->pid();
my @pids;
my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
my @more = ($pid);
# While more (grand)*children
while(@more) {
my @m;
push @pids, @more;
for my $parent (@more) {
if($children_of_ref->{$parent}) {
# add the children of this parent
push @m, @{$children_of_ref->{$parent}};
}
}
@more = @m;
}
return (@pids);
}
sub failed {
# return number of times failed for this $sshlogin
# Input:
# $sshlogin
# Returns:
# Number of times failed for $sshlogin
my $self = shift;
my $sshlogin = shift;
return $self->{'failed'}{$sshlogin};
}
sub failed_here {
# return number of times failed for the current $sshlogin
# Returns:
# Number of times failed for this sshlogin
my $self = shift;
return $self->{'failed'}{$self->sshlogin()};
}
sub add_failed {
# increase the number of times failed for this $sshlogin
my $self = shift;
my $sshlogin = shift;
$self->{'failed'}{$sshlogin}++;
}
sub add_failed_here {
# increase the number of times failed for the current $sshlogin
my $self = shift;
$self->{'failed'}{$self->sshlogin()}++;
}
sub reset_failed {
# increase the number of times failed for this $sshlogin
my $self = shift;
my $sshlogin = shift;
delete $self->{'failed'}{$sshlogin};
}
sub reset_failed_here {
# increase the number of times failed for this $sshlogin
my $self = shift;
delete $self->{'failed'}{$self->sshlogin()};
}
sub min_failed {
# Returns:
# the number of sshlogins this command has failed on
# the minimal number of times this command has failed
my $self = shift;
my $min_failures =
::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
return ($number_of_sshlogins_failed_on,$min_failures);
}
sub total_failed {
# Returns:
# $total_failures = the number of times this command has failed
my $self = shift;
my $total_failures = 0;
for (values %{$self->{'failed'}}) {
$total_failures += $_;
}
return $total_failures;
}
sub wrapped {
# Wrap command with:
# * --shellquote
# * --nice
# * --cat
# * --fifo
# * --sshlogin
# * --pipepart (@Global::cat_partials)
# * --pipe
# * --tmux
# The ordering of the wrapping is important:
# * --nice/--cat/--fifo should be done on the remote machine
# * --pipepart/--pipe should be done on the local machine inside --tmux
# Uses:
# $Global::envvar
# $opt::shellquote
# $opt::nice
# $Global::shell
# $opt::cat
# $opt::fifo
# @Global::cat_partials
# $opt::pipe
# $opt::tmux
# Returns:
# $self->{'wrapped'} = the command wrapped with the above
my $self = shift;
if(not defined $self->{'wrapped'}) {
my $command = $Global::envvar.$self->replaced();
if($opt::shellquote) {
# Prepend echo
# and quote twice
$command = "echo " .
::shell_quote_scalar(::shell_quote_scalar($command));
}
if($opt::nice) {
# Prepend \nice -n19 $SHELL -c
# and quote.
# The '\' before nice is needed to avoid tcsh's built-in
$command = '\nice'. " -n". $opt::nice. " ".
$Global::shell. " -c ".
::shell_quote_scalar($command);
}
if($opt::cat) {
# Prepend 'cat > {};'
# Append '_EXIT=$?;(rm {};exit $_EXIT)'
$command =
$self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0).
$command.
$self->{'commandline'}->replace_placeholders(
["; _EXIT=\$?; rm \257<\257>; exit \$_EXIT"], 0, 0);
} elsif($opt::fifo) {
# Prepend 'mkfifo {}; ('
# Append ') & _PID=$!; cat > {}; wait $_PID; _EXIT=$?;(rm {};exit $_EXIT)'
$command =
$self->{'commandline'}->replace_placeholders(["mkfifo \257<\257>; ("], 0, 0).
$command.
$self->{'commandline'}->replace_placeholders([") & _PID=\$!; cat > \257<\257>; ",
"wait \$_PID; _EXIT=\$?; ",
"rm \257<\257>; exit \$_EXIT"],
0,0);
}
# Wrap with ssh + tranferring of files
$command = $self->sshlogin_wrap($command);
if(@Global::cat_partials) {
# Prepend:
# < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 11 |
$command = (shift @Global::cat_partials). "|". "(". $command. ")";
} elsif($opt::pipe) {
# Prepend EOF-detector to avoid starting $command if EOF.
# The $tmpfile might exist if run on a remote system - we accept that risk
my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr");
# Unlink to avoid leaving files if --dry-run or --sshlogin
unlink $tmpfile;
$command =
# Exit value:
# empty input = true
# some input = exit val from command
qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }.
qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }.
qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }.
"($command);";
}
if($opt::tmux) {
# Wrap command with 'tmux'
$command = $self->tmux_wrap($command);
}
$self->{'wrapped'} = $command;
}
return $self->{'wrapped'};
}
sub set_sshlogin {
my $self = shift;
my $sshlogin = shift;
$self->{'sshlogin'} = $sshlogin;
delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
delete $self->{'wrapped'};
}
sub sshlogin {
my $self = shift;
return $self->{'sshlogin'};
}
sub sshlogin_wrap {
# Wrap the command with the commands needed to run remotely
# Returns:
# $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
my $self = shift;
my $command = shift;
if(not defined $self->{'sshlogin_wrap'}) {
my $sshlogin = $self->sshlogin();
my $sshcmd = $sshlogin->sshcommand();
my $serverlogin = $sshlogin->serverlogin();
my ($pre,$post,$cleanup)=("","","");
if($serverlogin eq ":") {
# No transfer neeeded
$self->{'sshlogin_wrap'} = $command;
} else {
# --transfer
$pre .= $self->sshtransfer();
# --return
$post .= $self->sshreturn();
# --cleanup
$post .= $self->sshcleanup();
if($post) {
# We need to save the exit status of the job
$post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;';
}
# If the remote login shell is (t)csh then use 'setenv'
# otherwise use 'export'
# We cannot use parse_env_var(), as PARALLEL_SEQ changes
# for each command
my $parallel_env =
($Global::envwarn
. q{ 'eval `echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null }
. q{ && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; }
. q{ setenv PARALLEL_PID '$PARALLEL_PID' }
. q{ || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; }
. q{ PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' });
my $remote_pre = "";
my $ssh_options = "";
if(($opt::pipe or $opt::pipepart) and $opt::ctrlc
or
not ($opt::pipe or $opt::pipepart) and not $opt::noctrlc) {
# TODO Determine if this is needed
# Propagating CTRL-C to kill remote jobs requires
# remote jobs to be run with a terminal.
$ssh_options = "-tt -oLogLevel=quiet";
# $ssh_options = "";
# tty - check if we have a tty.
# stty:
# -onlcr - make output 8-bit clean
# isig - pass CTRL-C as signal
# -echo - do not echo input
$remote_pre .= ::shell_quote_scalar('tty >/dev/null && stty isig -onlcr -echo;');
}
if($opt::workdir) {
my $wd = ::shell_quote_file($self->workdir());
$remote_pre .= ::shell_quote_scalar("mkdir -p ") . $wd .
::shell_quote_scalar("; cd ") . $wd .
# exit 255 (instead of exec false) would be the correct thing,
# but that fails on tcsh
::shell_quote_scalar(qq{ || exec false;});
}
# This script is to solve the problem of
# * not mixing STDERR and STDOUT
# * terminating with ctrl-c
# It works on Linux but not Solaris
# Finishes on Solaris, but wrong exit code:
# $SIG{CHLD} = sub {exit ($?&127 ? 128+($?&127) : 1+$?>>8)};
# Hangs on Solaris, but correct exit code on Linux:
# $SIG{CHLD} = sub { $done = 1 };
# $p->poll;
my $signal_script = "perl -e '".
q{
use IO::Poll;
$SIG{CHLD} = sub { $done = 1 };
$p = IO::Poll->new;
$p->mask(STDOUT, POLLHUP);
$pid=fork; unless($pid) {setpgrp; exec $ENV{SHELL}, "-c", @ARGV; die "exec: $!\n"}
$p->poll;
kill SIGHUP, -${pid} unless $done;
wait; exit ($?&127 ? 128+($?&127) : 1+$?>>8)
} . "' ";
$signal_script =~ s/\s+/ /g;
$self->{'sshlogin_wrap'} =
($pre
. "$sshcmd $ssh_options $serverlogin $parallel_env "
. $remote_pre
# . ::shell_quote_scalar($signal_script . ::shell_quote_scalar($command))
. ::shell_quote_scalar($command)
. ";"
. $post);
}
}
return $self->{'sshlogin_wrap'};
}
sub transfer {
# Files to transfer
# Returns:
# @transfer - File names of files to transfer
my $self = shift;
my @transfer = ();
$self->{'transfersize'} = 0;
if($opt::transfer) {
for my $record (@{$self->{'commandline'}{'arg_list'}}) {
# Merge arguments from records into args
for my $arg (@$record) {
CORE::push @transfer, $arg->orig();
# filesize
if(-e $arg->orig()) {
$self->{'transfersize'} += (stat($arg->orig()))[7];
}
}
}
}
return @transfer;
}
sub transfersize {
my $self = shift;
return $self->{'transfersize'};
}
sub sshtransfer {
# Returns for each transfer file:
# rsync $file remote:$workdir
my $self = shift;
my @pre;
my $sshlogin = $self->sshlogin();
my $workdir = $self->workdir();
for my $file ($self->transfer()) {
push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
}
return join("",@pre);
}
sub return {
# Files to return
# Non-quoted and with {...} substituted
# Returns:
# @non_quoted_filenames
my $self = shift;
return $self->{'commandline'}->
replace_placeholders($self->{'commandline'}{'return_files'},0,0);
}
sub returnsize {
# This is called after the job has finished
# Returns:
# $number_of_bytes transferred in return
my $self = shift;
for my $file ($self->return()) {
if(-e $file) {
$self->{'returnsize'} += (stat($file))[7];
}
}
return $self->{'returnsize'};
}
sub sshreturn {
# Returns for each return-file:
# rsync remote:$workdir/$file .
my $self = shift;
my $sshlogin = $self->sshlogin();
my $sshcmd = $sshlogin->sshcommand();
my $serverlogin = $sshlogin->serverlogin();
my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd);
my $pre = "";
for my $file ($self->return()) {
$file =~ s:^\./::g; # Remove ./ if any
my $relpath = ($file !~ m:^/:); # Is the path relative?
my $cd = "";
my $wd = "";
if($relpath) {
# rsync -avR /foo/./bar/baz.c remote:/tmp/
# == (on old systems)
# rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
$wd = ::shell_quote_file($self->workdir()."/");
}
# Only load File::Basename if actually needed
$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
# dir/./file means relative to dir, so remove dir on remote
$file =~ m:(.*)/\./:;
my $basedir = $1 ? ::shell_quote_file($1."/") : "";
my $nobasedir = $file;
$nobasedir =~ s:.*/\./::;
$cd = ::shell_quote_file(::dirname($nobasedir));
my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync");
my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file)));
# --return
# mkdir -p /home/tange/dir/subdir/;
# rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync"
# server:file.gz /home/tange/dir/subdir/
$pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:".
$basename . " ".$basedir.$cd.";";
}
return $pre;
}
sub sshcleanup {
# Return the sshcommand needed to remove the file
# Returns:
# ssh command needed to remove files from sshlogin
my $self = shift;
my $sshlogin = $self->sshlogin();
my $sshcmd = $sshlogin->sshcommand();
my $serverlogin = $sshlogin->serverlogin();
my $workdir = $self->workdir();
my $cleancmd = "";
for my $file ($self->cleanup()) {
my @subworkdirs = parentdirs_of($file);
$cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
}
if(defined $opt::workdir and $opt::workdir eq "...") {
$cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';';
}
return $cleancmd;
}
sub cleanup {
# Returns:
# Files to remove at cleanup
my $self = shift;
if($opt::cleanup) {
my @transfer = $self->transfer();
my @return = $self->return();
return (@transfer,@return);
} else {
return ();
}
}
sub workdir {
# Returns:
# the workdir on a remote machine
my $self = shift;
if(not defined $self->{'workdir'}) {
my $workdir;
if(defined $opt::workdir) {
if($opt::workdir eq ".") {
# . means current dir
my $home = $ENV{'HOME'};
eval 'use Cwd';
my $cwd = cwd();
$workdir = $cwd;
if($home) {
# If homedir exists: remove the homedir from
# workdir if cwd starts with homedir
# E.g. /home/foo/my/dir => my/dir
# E.g. /tmp/my/dir => /tmp/my/dir
my ($home_dev, $home_ino) = (stat($home))[0,1];
my $parent = "";
my @dir_parts = split(m:/:,$cwd);
my $part;
while(defined ($part = shift @dir_parts)) {
$part eq "" and next;
$parent .= "/".$part;
my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
if($parent_dev == $home_dev and $parent_ino == $home_ino) {
# dev and ino is the same: We found the homedir.
$workdir = join("/",@dir_parts);
last;
}
}
}
if($workdir eq "") {
$workdir = ".";
}
} elsif($opt::workdir eq "...") {
$workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
. "-" . $self->seq();
} else {
$workdir = $opt::workdir;
# Rsync treats /./ special. We don't want that
$workdir =~ s:/\./:/:g; # Remove /./
$workdir =~ s:/+$::; # Remove ending / if any
$workdir =~ s:^\./::g; # Remove starting ./ if any
}
} else {
$workdir = ".";
}
$self->{'workdir'} = ::shell_quote_scalar($workdir);
}
return $self->{'workdir'};
}
sub parentdirs_of {
# Return:
# all parentdirs except . of this dir or file - sorted desc by length
my $d = shift;
my @parents = ();
while($d =~ s:/[^/]+$::) {
if($d ne ".") {
push @parents, $d;
}
}
return @parents;
}
sub start {
# Setup STDOUT and STDERR for a job and start it.
# Returns:
# job-object or undef if job not to run
my $job = shift;
# Get the shell command to be executed (possibly with ssh infront).
my $command = $job->wrapped();
if($Global::interactive or $Global::stderr_verbose) {
if($Global::interactive) {
print $Global::original_stderr "$command ?...";
open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
my $answer = <$tty_fh>;
close $tty_fh;
my $run_yes = ($answer =~ /^\s*y/i);
if (not $run_yes) {
$command = "true"; # Run the command 'true'
}
} else {
print $Global::original_stderr "$command\n";
}
}
my $pid;
$job->openoutputfiles();
my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
local (*IN,*OUT,*ERR);
open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!");
open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!");
if(($opt::dryrun or $Global::verbose) and $opt::ungroup) {
if($Global::verbose <= 1) {
print $stdout_fh $job->replaced(),"\n";
} else {
# Verbose level > 1: Print the rsync and stuff
print $stdout_fh $command,"\n";
}
}
if($opt::dryrun) {
$command = "true";
}
$ENV{'PARALLEL_SEQ'} = $job->seq();
$ENV{'PARALLEL_PID'} = $$;
::debug("run", $Global::total_running, " processes . Starting (",
$job->seq(), "): $command\n");
if($opt::pipe) {
my ($stdin_fh);
# The eval is needed to catch exception from open3
eval {
$pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
::die_bug("open3-pipe");
1;
};
$job->set_fh(0,"w",$stdin_fh);
} elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1
and $job->sshlogin()->string() eq ":") {
# Give STDIN to the first job if using -a (but only if running
# locally - otherwise CTRL-C does not work for other jobs Bug#36585)
*IN = *STDIN;
# The eval is needed to catch exception from open3
eval {
$pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
::die_bug("open3-a");
1;
};
# Re-open to avoid complaining
open(STDIN, "<&", $Global::original_stdin)
or ::die_bug("dup-\$Global::original_stdin: $!");
} elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and
open(my $devtty_fh, "<", "/dev/tty")) {
# Give /dev/tty to the command if no one else is using it
*IN = $devtty_fh;
# The eval is needed to catch exception from open3
eval {
$pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
::die_bug("open3-/dev/tty");
$Global::tty_taken = $pid;
close $devtty_fh;
1;
};
} else {
# The eval is needed to catch exception from open3
eval {
$pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
::die_bug("open3-gensym");
1;
};
}
if($pid) {
# A job was started
$Global::total_running++;
$Global::total_started++;
$job->set_pid($pid);
$job->set_starttime();
$Global::running{$job->pid()} = $job;
if($opt::timeout) {
$Global::timeoutq->insert($job);
}
$Global::newest_job = $job;
$Global::newest_starttime = ::now();
return $job;
} else {
# No more processes
::debug("run", "Cannot spawn more jobs.\n");
return undef;
}
}
sub tmux_wrap {
# Wrap command with tmux for session pPID
# Input:
# $actual_command = the actual command being run (incl ssh wrap)
my $self = shift;
my $actual_command = shift;
# Temporary file name. Used for fifo to communicate exit val
my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".tmx");
$Global::unlink{$tmpfile}=1;
close $fh;
unlink $tmpfile;
my $visual_command = $self->replaced();
my $title = $visual_command;
# ; causes problems
# ascii 194-245 annoys tmux
$title =~ tr/[\011-\016;\302-\365]//d;
my $tmux;
if($Global::total_running == 0) {
$tmux = "tmux new-session -s p$$ -d -n ".
::shell_quote_scalar($title);
print $Global::original_stderr "See output with: tmux attach -t p$$\n";
} else {
$tmux = "tmux new-window -t p$$ -n ".::shell_quote_scalar($title);
}
return "mkfifo $tmpfile; $tmux ".
# Run in tmux
::shell_quote_scalar(
"(".$actual_command.');(echo $?$status;echo 255) >'.$tmpfile."&".
"echo ".::shell_quote_scalar($visual_command).";".
"echo \007Job finished at: `date`;sleep 10").
# Run outside tmux
# Read the first line from the fifo and use that as status code
"; exit `perl -ne 'unlink \$ARGV; 1..1 and print' $tmpfile` ";
}
sub is_already_in_results {
# Do we already have results for this job?
# Returns:
# $job_already_run = bool whether there is output for this or not
my $job = $_[0];
my $args_as_dirname = $job->{'commandline'}->args_as_dirname();
# prefix/name1/val1/name2/val2/
my $dir = $opt::results."/".$args_as_dirname;
::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n");
return -e "$dir/stdout";
}
sub is_already_in_joblog {
my $job = shift;
return vec($Global::job_already_run,$job->seq(),1);
}
sub set_job_in_joblog {
my $job = shift;
vec($Global::job_already_run,$job->seq(),1) = 1;
}
sub should_be_retried {
# Should this job be retried?
# Returns
# 0 - do not retry
# 1 - job queued for retry
my $self = shift;
if (not $opt::retries) {
return 0;
}
if(not $self->exitstatus()) {
# Completed with success. If there is a recorded failure: forget it
$self->reset_failed_here();
return 0
} else {
# The job failed. Should it be retried?
$self->add_failed_here();
if($self->total_failed() == $opt::retries) {
# This has been retried enough
return 0;
} else {
# This command should be retried
$self->set_endtime(undef);
$Global::JobQueue->unget($self);
::debug("run", "Retry ", $self->seq(), "\n");
return 1;
}
}
}
sub print {
# Print the output of the jobs
# Returns: N/A
my $self = shift;
::debug("print", ">>joboutput ", $self->replaced(), "\n");
if($opt::dryrun) {
# Nothing was printed to this job:
# cleanup tmp files if --files was set
unlink $self->fh(1,"name");
}
if($opt::pipe and $self->virgin()) {
# Skip --joblog, --dryrun, --verbose
} else {
if($Global::joblog and defined $self->{'exitstatus'}) {
# Add to joblog when finished
$self->print_joblog();
}
# Printing is only relevant for grouped/--line-buffer output.
$opt::ungroup and return;
# Check for disk full
exit_if_disk_full();
if(($opt::dryrun or $Global::verbose)
and
not $self->{'verbose_printed'}) {
$self->{'verbose_printed'}++;
if($Global::verbose <= 1) {
print STDOUT $self->replaced(),"\n";
} else {
# Verbose level > 1: Print the rsync and stuff
print STDOUT $self->wrapped(),"\n";
}
# If STDOUT and STDERR are merged,
# we want the command to be printed first
# so flush to avoid STDOUT being buffered
flush STDOUT;
}
}
for my $fdno (sort { $a <=> $b } keys %Global::fd) {
# Sort by file descriptor numerically: 1,2,3,..,9,10,11
$fdno == 0 and next;
my $out_fd = $Global::fd{$fdno};
my $in_fh = $self->fh($fdno,"r");
if(not $in_fh) {
if(not $Job::file_descriptor_warning_printed{$fdno}++) {
# ::warning("File descriptor $fdno not defined\n");
}
next;
}
::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):");
if($opt::files) {
# If --compress: $in_fh must be closed first.
close $self->fh($fdno,"w");
close $in_fh;
if($opt::pipe and $self->virgin()) {
# Nothing was printed to this job:
# cleanup unused tmp files if --files was set
for my $fdno (1,2) {
unlink $self->fh($fdno,"name");
unlink $self->fh($fdno,"unlink");
}
} elsif($fdno == 1 and $self->fh($fdno,"name")) {
print $out_fd $self->fh($fdno,"name"),"\n";
}
} elsif($opt::linebuffer) {
# Line buffered print out
$self->linebuffer_print($fdno,$in_fh,$out_fd);
} else {
my $buf;
close $self->fh($fdno,"w");
seek $in_fh, 0, 0;
# $in_fh is now ready for reading at position 0
if($opt::tag or defined $opt::tagstring) {
my $tag = $self->tag();
if($fdno == 2) {
# OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
# This is a crappy way of ignoring it.
while(<$in_fh>) {
if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) {
# Skip
} else {
print $out_fd $tag,$_;
}
# At most run the loop once
last;
}
}
while(<$in_fh>) {
print $out_fd $tag,$_;
}
} else {
my $buf;
if($fdno == 2) {
# OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
# This is a crappy way of ignoring it.
sysread($in_fh,$buf,1_000);
$buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//;
print $out_fd $buf;
}
while(sysread($in_fh,$buf,32768)) {
print $out_fd $buf;
}
}
close $in_fh;
}
flush $out_fd;
}
::debug("print", "<<joboutput @command\n");
}
sub linebuffer_print {
my $self = shift;
my ($fdno,$in_fh,$out_fd) = @_;
my $partial = \$self->{'partial_line',$fdno};
if(defined $self->{'exitstatus'}) {
# If the job is dead: close printing fh. Needed for --compress
close $self->fh($fdno,"w");
if($opt::compress) {
# Blocked reading in final round
$Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
for my $fdno (1,2) {
my $fdr = $self->fh($fdno,'r');
my $flags;
fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
$flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags
fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
}
}
}
# This seek will clear EOF
seek $in_fh, tell($in_fh), 0;
# The read is non-blocking: The $in_fh is set to non-blocking.
# 32768 --tag = 5.1s
# 327680 --tag = 4.4s
# 1024000 --tag = 4.4s
# 3276800 --tag = 4.3s
# 32768000 --tag = 4.7s
# 10240000 --tag = 4.3s
while(read($in_fh,substr($$partial,length $$partial),3276800)) {
# Append to $$partial
# Find the last \n
my $i = rindex($$partial,"\n");
if($i != -1) {
# One or more complete lines were found
if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) {
# OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
# This is a crappy way of ignoring it.
$$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//;
# Length of partial line has changed: Find the last \n again
$i = rindex($$partial,"\n");
}
if($opt::tag or defined $opt::tagstring) {
# Replace ^ with $tag within the full line
my $tag = $self->tag();
substr($$partial,0,$i+1) =~ s/^/$tag/gm;
# Length of partial line has changed: Find the last \n again
$i = rindex($$partial,"\n");
}
# Print up to and including the last \n
print $out_fd substr($$partial,0,$i+1);
# Remove the printed part
substr($$partial,0,$i+1)="";
}
}
if(defined $self->{'exitstatus'}) {
# If the job is dead: print the remaining partial line
# read remaining
if($$partial and ($opt::tag or defined $opt::tagstring)) {
my $tag = $self->tag();
$$partial =~ s/^/$tag/gm;
}
print $out_fd $$partial;
# Release the memory
$$partial = undef;
if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) {
# decompress still running
} else {
# decompress done: close fh
close $in_fh;
}
}
}
sub print_joblog {
my $self = shift;
my $cmd;
if($Global::verbose <= 1) {
$cmd = $self->replaced();
} else {
# Verbose level > 1: Print the rsync and stuff
$cmd = "@command";
}
print $Global::joblog
join("\t", $self->seq(), $self->sshlogin()->string(),
$self->starttime(), sprintf("%10.3f",$self->runtime()),
$self->transfersize(), $self->returnsize(),
$self->exitstatus(), $self->exitsignal(), $cmd
). "\n";
flush $Global::joblog;
$self->set_job_in_joblog();
}
sub tag {
my $self = shift;
if(not defined $self->{'tag'}) {
$self->{'tag'} = $self->{'commandline'}->
replace_placeholders([$opt::tagstring],0,0)."\t";
}
return $self->{'tag'};
}
sub hostgroups {
my $self = shift;
if(not defined $self->{'hostgroups'}) {
$self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
}
return @{$self->{'hostgroups'}};
}
sub exitstatus {
my $self = shift;
return $self->{'exitstatus'};
}
sub set_exitstatus {
my $self = shift;
my $exitstatus = shift;
if($exitstatus) {
# Overwrite status if non-zero
$self->{'exitstatus'} = $exitstatus;
} else {
# Set status but do not overwrite
# Status may have been set by --timeout
$self->{'exitstatus'} ||= $exitstatus;
}
}
sub exitsignal {
my $self = shift;
return $self->{'exitsignal'};
}
sub set_exitsignal {
my $self = shift;
my $exitsignal = shift;
$self->{'exitsignal'} = $exitsignal;
}
{
my ($disk_full_fh, $b8193, $name);
sub exit_if_disk_full {
# Checks if $TMPDIR is full by writing 8kb to a tmpfile
# If the disk is full: Exit immediately.
# Returns:
# N/A
if(not $disk_full_fh) {
($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df");
unlink $name;
$b8193 = "x"x8193;
}
# Linux does not discover if a disk is full if writing <= 8192
# Tested on:
# bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
# ntfs reiserfs tmpfs ubifs vfat xfs
# TODO this should be tested on different OS similar to this:
#
# doit() {
# sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
# seq 100000 | parallel --tmpdir /mnt/loop/ true &
# seq 6900000 > /mnt/loop/i && echo seq OK
# seq 6980868 > /mnt/loop/i
# seq 10000 > /mnt/loop/ii
# sleep 3
# sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
# echo >&2
# }
print $disk_full_fh $b8193;
if(not $disk_full_fh
or
tell $disk_full_fh == 0) {
::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n");
::error("Change \$TMPDIR with --tmpdir or use --compress.\n");
::wait_and_exit(255);
}
truncate $disk_full_fh, 0;
seek($disk_full_fh, 0, 0) || die;
}
}
package CommandLine;
sub new {
my $class = shift;
my $seq = shift;
my $commandref = shift;
$commandref || die;
my $arg_queue = shift;
my $context_replace = shift;
my $max_number_of_args = shift; # for -N and normal (-n1)
my $return_files = shift;
my $replacecount_ref = shift;
my $len_ref = shift;
my %replacecount = %$replacecount_ref;
my %len = %$len_ref;
for (keys %$replacecount_ref) {
# Total length of this replacement string {} replaced with all args
$len{$_} = 0;
}
return bless {
'command' => $commandref,
'seq' => $seq,
'len' => \%len,
'arg_list' => [],
'arg_queue' => $arg_queue,
'max_number_of_args' => $max_number_of_args,
'replacecount' => \%replacecount,
'context_replace' => $context_replace,
'return_files' => $return_files,
'replaced' => undef,
}, ref($class) || $class;
}
sub seq {
my $self = shift;
return $self->{'seq'};
}
{
my $max_slot_number;
sub slot {
# Find the number of a free job slot and return it
# Uses:
# @Global::slots
# Returns:
# $jobslot = number of jobslot
my $self = shift;
if(not $self->{'slot'}) {
if(not @Global::slots) {
# $Global::max_slot_number will typically be $Global::max_jobs_running
push @Global::slots, ++$max_slot_number;
}
$self->{'slot'} = shift @Global::slots;
}
return $self->{'slot'};
}
}
sub populate {
# Add arguments from arg_queue until the number of arguments or
# max line length is reached
# Uses:
# $Global::minimal_command_line_length
# $opt::cat
# $opt::fifo
# $Global::JobQueue
# $opt::m
# $opt::X
# $CommandLine::already_spread
# $Global::max_jobs_running
# Returns: N/A
my $self = shift;
my $next_arg;
my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length();
if($opt::cat or $opt::fifo) {
# Generate a tempfile name that will be used as {}
my($outfh,$name) = ::tmpfile(SUFFIX => ".pip");
close $outfh;
# Unlink is needed if: ssh otheruser@localhost
unlink $name;
$Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget([Arg->new($name)]);
}
while (not $self->{'arg_queue'}->empty()) {
$next_arg = $self->{'arg_queue'}->get();
if(not defined $next_arg) {
next;
}
$self->push($next_arg);
if($self->len() >= $max_len) {
# Command length is now > max_length
# If there are arguments: remove the last
# If there are no arguments: Error
# TODO stuff about -x opt_x
if($self->number_of_args() > 1) {
# There is something to work on
$self->{'arg_queue'}->unget($self->pop());
last;
} else {
my $args = join(" ", map { $_->orig() } @$next_arg);
::error("Command line too long (",
$self->len(), " >= ",
$max_len,
") at number ",
$self->{'arg_queue'}->arg_number(),
": ".
(substr($args,0,50))."...\n");
$self->{'arg_queue'}->unget($self->pop());
::wait_and_exit(255);
}
}
if(defined $self->{'max_number_of_args'}) {
if($self->number_of_args() >= $self->{'max_number_of_args'}) {
last;
}
}
}
if(($opt::m or $opt::X) and not $CommandLine::already_spread
and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
# -m or -X and EOF => Spread the arguments over all jobslots
# (unless they are already spread)
$CommandLine::already_spread ||= 1;
if($self->number_of_args() > 1) {
$self->{'max_number_of_args'} =
::ceil($self->number_of_args()/$Global::max_jobs_running);
$Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
$self->{'max_number_of_args'};
$self->{'arg_queue'}->unget($self->pop_all());
while($self->number_of_args() < $self->{'max_number_of_args'}) {
$self->push($self->{'arg_queue'}->get());
}
}
}
}
sub push {
# Add one or more records as arguments
# Returns: N/A
my $self = shift;
my $record = shift;
push @{$self->{'arg_list'}}, $record;
my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
my $rep;
for my $arg (@$record) {
if(defined $arg) {
for my $perlexpr (keys %{$self->{'replacecount'}}) {
# 50% faster than below
$self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self);
# $rep = $arg->replace($perlexpr,$quote_arg,$self);
# $self->{'len'}{$perlexpr} += length $rep;
# ::debug("length", "Length: ", length $rep,
# "(", $perlexpr, "=>", $rep, ")\n");
}
}
}
}
sub pop {
# Remove last argument
# Returns:
# the last record
my $self = shift;
my $record = pop @{$self->{'arg_list'}};
my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
for my $arg (@$record) {
if(defined $arg) {
for my $perlexpr (keys %{$self->{'replacecount'}}) {
$self->{'len'}{$perlexpr} -=
length $arg->replace($perlexpr,$quote_arg,$self);
}
}
}
return $record;
}
sub pop_all {
# Remove all arguments and zeros the length of replacement strings
# Returns:
# all records
my $self = shift;
my @popped = @{$self->{'arg_list'}};
for my $replacement_string (keys %{$self->{'replacecount'}}) {
$self->{'len'}{$replacement_string} = 0;
}
$self->{'arg_list'} = [];
return @popped;
}
sub number_of_args {
# The number of records
# Returns:
# number of records
my $self = shift;
# Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd
# Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az
# 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq
# qymux oaawuq@fmzsq.pw itqz kag dqmp ftue.
#
# U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG
# Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq
# oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq
# eagdoq oapq.
#
# Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz
# ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq
# ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq
# eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq
# oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq
# eagdoq oapq U daf13'qp ftq eagdoq oapq
# tffb://qz.iuwubqpum.ads/iuwu/DAF13
#
# 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk
# ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq
# tmp fa nq daf13'qp.
#
# Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita
# mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq
# oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz.
#
# This is really the number of records
return $#{$self->{'arg_list'}}+1;
}
sub number_of_recargs {
# The number of args in records
# Returns:
# number of args records
my $self = shift;
my $sum = 0;
my $nrec = scalar @{$self->{'arg_list'}};
if($nrec) {
$sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
}
return $sum;
}
sub args_as_string {
# Returns:
# all unmodified arguments joined with ' ' (similar to {})
my $self = shift;
return (join " ", map { $_->orig() }
map { @$_ } @{$self->{'arg_list'}});
}
sub args_as_dirname {
# Returns:
# all unmodified arguments joined with '/' (similar to {})
# \t \0 \\ and / are quoted as: \t \0 \\ \_
# If $Global::max_file_length: Keep subdirs < $Global::max_file_length
my $self = shift;
my @res = ();
for my $rec_ref (@{$self->{'arg_list'}}) {
# If headers are used, sort by them.
# Otherwise keep the order from the command line.
my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
for my $n (@header_indexes_sorted) {
CORE::push(@res,
$Global::input_source_header{$n},
map { my $s = $_;
# \t \0 \\ and / are quoted as: \t \0 \\ \_
$s =~ s/\\/\\\\/g;
$s =~ s/\t/\\t/g;
$s =~ s/\0/\\0/g;
$s =~ s:/:\\_:g;
if($Global::max_file_length) {
# Keep each subdir shorter than the longest
# allowed file name
$s = substr($s,0,$Global::max_file_length);
}
$s; }
$rec_ref->[$n-1]->orig());
}
}
return join "/", @res;
}
sub header_indexes_sorted {
# Sort headers first by number then by name.
# E.g.: 1a 1b 11a 11b
# Returns:
# Indexes of %Global::input_source_header sorted
my $max_col = shift;
no warnings 'numeric';
for my $col (1 .. $max_col) {
# Make sure the header is defined. If it is not: use column number
if(not defined $Global::input_source_header{$col}) {
$Global::input_source_header{$col} = $col;
}
}
my @header_indexes_sorted = sort {
# Sort headers numerically then asciibetically
$Global::input_source_header{$a} <=> $Global::input_source_header{$b}
or
$Global::input_source_header{$a} cmp $Global::input_source_header{$b}
} 1 .. $max_col;
return @header_indexes_sorted;
}
sub len {
# Uses:
# $opt::shellquote
# The length of the command line with args substituted
my $self = shift;
my $len = 0;
# Add length of the original command with no args
# Length of command w/ all replacement args removed
$len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
::debug("length", "noncontext + command: $len\n");
my $recargs = $self->number_of_recargs();
if($self->{'context_replace'}) {
# Context is duplicated for each arg
$len += $recargs * $self->{'len'}{'context'};
for my $replstring (keys %{$self->{'replacecount'}}) {
# If the replacements string is more than once: mulitply its length
$len += $self->{'len'}{$replstring} *
$self->{'replacecount'}{$replstring};
::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
$self->{'replacecount'}{$replstring}, "\n");
}
# echo 11 22 33 44 55 66 77 88 99 1010
# echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
# 5 + ctxgrp*arg
::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
" Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
# Add space between context groups
$len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
} else {
# Each replacement string may occur several times
# Add the length for each time
$len += 1*$self->{'len'}{'context'};
::debug("length", "context+noncontext + command: $len\n");
for my $replstring (keys %{$self->{'replacecount'}}) {
# (space between regargs + length of replacement)
# * number this replacement is used
$len += ($recargs -1 + $self->{'len'}{$replstring}) *
$self->{'replacecount'}{$replstring};
}
}
if($opt::nice) {
# Pessimistic length if --nice is set
# Worse than worst case: every char needs to be quoted with \
$len *= 2;
}
if($Global::quoting) {
# Pessimistic length if -q is set
# Worse than worst case: every char needs to be quoted with \
$len *= 2;
}
if($opt::shellquote) {
# Pessimistic length if --shellquote is set
# Worse than worst case: every char needs to be quoted with \ twice
$len *= 4;
}
# If we are using --env, add the prefix for that, too.
$len += $Global::envvarlen;
return $len;
}
sub replaced {
# Uses:
# $Global::noquote
# $Global::quoting
# Returns:
# $replaced = command with place holders replaced and prepended
my $self = shift;
if(not defined $self->{'replaced'}) {
# Don't quote arguments if the input is the full command line
my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
$self->{'replaced'} = $self->replace_placeholders($self->{'command'},$Global::quoting,$quote_arg);
my $len = length $self->{'replaced'};
if ($len != $self->len()) {
::debug("length", $len, " != ", $self->len(), " ", $self->{'replaced'}, "\n");
} else {
::debug("length", $len, " == ", $self->len(), " ", $self->{'replaced'}, "\n");
}
}
return $self->{'replaced'};
}
sub replace_placeholders {
# Replace foo{}bar with fooargbar
# Input:
# $targetref = command as shell words
# $quote = should everything be quoted?
# $quote_arg = should replaced arguments be quoted?
# Returns:
# @target with placeholders replaced
my $self = shift;
my $targetref = shift;
my $quote = shift;
my $quote_arg = shift;
my $context_replace = $self->{'context_replace'};
my @target = @$targetref;
::debug("replace", "Replace @target\n");
# -X = context replace
# maybe multiple input sources
# maybe --xapply
if(not @target) {
# @target is empty: Return empty array
return @target;
}
# Fish out the words that have replacement strings in them
my %word;
for (@target) {
my $tt = $_;
::debug("replace", "Target: $tt");
# a{1}b{}c{}d
# a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d
# a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d
# A B C => aAbA B CcA B Cd
# -X A B C => aAbAcAd aAbBcBd aAbCcCd
if($context_replace) {
while($tt =~ s/([^\s\257]* # before {=
(?:
\257< # {=
[^\257]*? # The perl expression
\257> # =}
[^\s\257]* # after =}
)+)/ /x) {
# $1 = pre \257 perlexpr \257 post
$word{"$1"} ||= 1;
}
} else {
while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) {
# $f = \257 perlexpr \257
$word{$1} ||= 1;
}
}
}
my @word = keys %word;
my %replace;
my @arg;
for my $record (@{$self->{'arg_list'}}) {
# $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
# Merge arg-objects from records into @arg for easy access
CORE::push @arg, @$record;
}
# Add one arg if empty to allow {#} and {%} to be computed only once
if(not @arg) { @arg = (Arg->new("")); }
# Number of arguments - used for positional arguments
my $n = $#_+1;
# This is actually a CommandLine-object,
# but it looks nice to be able to say {= $job->slot() =}
my $job = $self;
for my $word (@word) {
# word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF
my $w = $word;
::debug("replace", "Replacing in $w\n");
# Replace positional arguments
$w =~ s< ([^\s\257]*) # before {=
\257< # {=
(-?\d+) # Position (eg. -2 or 3)
([^\257]*?) # The perl expression
\257> # =}
([^\s\257]*) # after =}
>
{ $1. # Context (pre)
(
$arg[$2 > 0 ? $2-1 : $n+$2] ? # If defined: replace
$arg[$2 > 0 ? $2-1 : $n+$2]->replace($3,$quote_arg,$self)
: "")
.$4 }egx;# Context (post)
::debug("replace", "Positional replaced $word with: $w\n");
if($w !~ /\257/) {
# No more replacement strings in $w: No need to do more
if($quote) {
CORE::push(@{$replace{::shell_quote($word)}}, $w);
} else {
CORE::push(@{$replace{$word}}, $w);
}
next;
}
# for each arg:
# compute replacement for each string
# replace replacement strings with replacement in the word value
# push to replace word value
::debug("replace", "Positional done: $w\n");
for my $arg (@arg) {
my $val = $w;
my $number_of_replacements = 0;
for my $perlexpr (keys %{$self->{'replacecount'}}) {
# Replace {= perl expr =} with value for each arg
$number_of_replacements +=
$val =~ s{\257<\Q$perlexpr\E\257>}
{$arg ? $arg->replace($perlexpr,$quote_arg,$self) : ""}eg;
}
my $ww = $word;
if($quote) {
$ww = ::shell_quote_scalar($word);
$val = ::shell_quote_scalar($val);
}
if($number_of_replacements) {
CORE::push(@{$replace{$ww}}, $val);
}
}
}
if($quote) {
@target = ::shell_quote(@target);
}
# ::debug("replace", "%replace=",::my_dump(%replace),"\n");
if(%replace) {
# Substitute the replace strings with the replacement values
# Must be sorted by length if a short word is a substring of a long word
my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s }
sort { length $b <=> length $a } keys %replace);
for(@target) {
s/($regexp)/join(" ",@{$replace{$1}})/ge;
}
}
::debug("replace", "Return @target\n");
return wantarray ? @target : "@target";
}
package CommandLineQueue;
sub new {
my $class = shift;
my $commandref = shift;
my $read_from = shift;
my $context_replace = shift;
my $max_number_of_args = shift;
my $return_files = shift;
my @unget = ();
my ($count,%replacecount,$posrpl,$perlexpr,%len);
my @command = @$commandref;
# If the first command start with '-' it is probably an option
if($command[0] =~ /^\s*(-\S+)/) {
# Is this really a command in $PATH starting with '-'?
my $cmd = $1;
if(not ::which($cmd)) {
::error("Command ($cmd) starts with '-'. Is this a wrong option?\n");
::wait_and_exit(255);
}
}
# Replace replacement strings with {= perl expr =}
# Protect matching inside {= perl expr =}
# by replacing {= and =} with \257< and \257>
for(@command) {
if(/\257/) {
::error("Command cannot contain the character \257. Use a function for that.\n");
::wait_and_exit(255);
}
s/\Q$Global::parensleft\E(.*?)\Q$Global::parensright\E/\257<$1\257>/gx;
}
for my $rpl (keys %Global::rpl) {
# Replace the short hand string with the {= perl expr =} in $command and $opt::tagstring
# Avoid replacing inside existing {= perl expr =}
for(@command,@Global::ret_files) {
while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257>
\Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/xg) {
}
}
if(defined $opt::tagstring) {
for($opt::tagstring) {
while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257>
\Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/x) {}
}
}
# Do the same for the positional replacement strings
# A bit harder as we have to put in the position number
$posrpl = $rpl;
if($posrpl =~ s/^\{//) {
# Only do this if the shorthand start with {
for(@command,@Global::ret_files) {
s/\{(-?\d+)\Q$posrpl\E/\257<$1 $Global::rpl{$rpl}\257>/g;
}
if(defined $opt::tagstring) {
$opt::tagstring =~ s/\{(-?\d+)\Q$posrpl\E/\257<$1 $perlexpr\257>/g;
}
}
}
my $sum = 0;
while($sum == 0) {
# Count how many times each replacement string is used
my @cmd = @command;
my $contextlen = 0;
my $noncontextlen = 0;
my $contextgroups = 0;
for my $c (@cmd) {
while($c =~ s/ \257<([^\257]*?)\257> /\000/x) {
# %replacecount = { "perlexpr" => number of times seen }
# e.g { "$_++" => 2 }
$replacecount{$1} ++;
$sum++;
}
# Measure the length of the context around the {= perl expr =}
# Use that {=...=} has been replaced with \000 above
# So there is no need to deal with \257<
while($c =~ s/ (\S*\000\S*) //x) {
my $w = $1;
$w =~ tr/\000//d; # Remove all \000's
$contextlen += length($w);
$contextgroups++;
}
# All {= perl expr =} have been removed: The rest is non-context
$noncontextlen += length $c;
}
if($opt::tagstring) {
my $t = $opt::tagstring;
while($t =~ s/ \257<([^\257]*)\257> //x) {
# %replacecount = { "perlexpr" => number of times seen }
# e.g { "$_++" => 2 }
# But for tagstring we just need to mark it as seen
$replacecount{$1}||=1;
}
}
$len{'context'} = 0+$contextlen;
$len{'noncontext'} = $noncontextlen;
$len{'contextgroups'} = $contextgroups;
$len{'noncontextgroups'} = @cmd-$contextgroups;
::debug("length", "@command Context: ", $len{'context'},
" Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
" NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
if($sum == 0) {
# Default command = {}
# If not replacement string: append {}
if(not @command) {
@command = ("\257<\257>");
$Global::noquote = 1;
} elsif(($opt::pipe or $opt::pipepart)
and not $opt::fifo and not $opt::cat) {
# With --pipe / --pipe-part you can have no replacement
last;
} else {
# Append {} to the command if there are no {...}'s and no {=...=}
push @command, ("\257<\257>");
}
}
}
return bless {
'unget' => \@unget,
'command' => \@command,
'replacecount' => \%replacecount,
'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
'context_replace' => $context_replace,
'len' => \%len,
'max_number_of_args' => $max_number_of_args,
'size' => undef,
'return_files' => $return_files,
'seq' => 1,
}, ref($class) || $class;
}
sub get {
my $self = shift;
if(@{$self->{'unget'}}) {
my $cmd_line = shift @{$self->{'unget'}};
return ($cmd_line);
} else {
my $cmd_line;
$cmd_line = CommandLine->new($self->seq(),
$self->{'command'},
$self->{'arg_queue'},
$self->{'context_replace'},
$self->{'max_number_of_args'},
$self->{'return_files'},
$self->{'replacecount'},
$self->{'len'},
);
$cmd_line->populate();
::debug("init","cmd_line->number_of_args ",
$cmd_line->number_of_args(), "\n");
if($opt::pipe or $opt::pipepart) {
if($cmd_line->replaced() eq "") {
# Empty command - pipe requires a command
::error("--pipe must have a command to pipe into (e.g. 'cat').\n");
::wait_and_exit(255);
}
} else {
if($cmd_line->number_of_args() == 0) {
# We did not get more args - maybe at EOF string?
return undef;
} elsif($cmd_line->replaced() eq "") {
# Empty command - get the next instead
return $self->get();
}
}
$self->set_seq($self->seq()+1);
return $cmd_line;
}
}
sub unget {
my $self = shift;
unshift @{$self->{'unget'}}, @_;
}
sub empty {
my $self = shift;
my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty();
::debug("run", "CommandLineQueue->empty $empty");
return $empty;
}
sub seq {
my $self = shift;
return $self->{'seq'};
}
sub set_seq {
my $self = shift;
$self->{'seq'} = shift;
}
sub quote_args {
my $self = shift;
# If there is not command emulate |bash
return $self->{'command'};
}
sub size {
my $self = shift;
if(not $self->{'size'}) {
my @all_lines = ();
while(not $self->{'arg_queue'}->empty()) {
push @all_lines, CommandLine->new($self->{'command'},
$self->{'arg_queue'},
$self->{'context_replace'},
$self->{'max_number_of_args'});
}
$self->{'size'} = @all_lines;
$self->unget(@all_lines);
}
return $self->{'size'};
}
package Limits::Command;
# Maximal command line length (for -m and -X)
sub max_length {
# Find the max_length of a command line and cache it
# Returns:
# number of chars on the longest command line allowed
if(not $Limits::Command::line_max_len) {
# Disk cache of max command line length
my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname();
my $cached_limit;
if(-e $len_cache) {
open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
$cached_limit = <$fh>;
close $fh;
} else {
$cached_limit = real_max_length();
# If $HOME is write protected: Do not fail
mkdir($ENV{'HOME'} . "/.parallel");
mkdir($ENV{'HOME'} . "/.parallel/tmp");
open(my $fh, ">", $len_cache);
print $fh $cached_limit;
close $fh;
}
$Limits::Command::line_max_len = $cached_limit;
if($opt::max_chars) {
if($opt::max_chars <= $cached_limit) {
$Limits::Command::line_max_len = $opt::max_chars;
} else {
::warning("Value for -s option ",
"should be < $cached_limit.\n");
}
}
}
return $Limits::Command::line_max_len;
}
sub real_max_length {
# Find the max_length of a command line
# Returns:
# The maximal command line length
# Use an upper bound of 8 MB if the shell allows for for infinite long lengths
my $upper = 8_000_000;
my $len = 8;
do {
if($len > $upper) { return $len };
$len *= 16;
} while (is_acceptable_command_line_length($len));
# Then search for the actual max length between 0 and upper bound
return binary_find_max_length(int($len/16),$len);
}
sub binary_find_max_length {
# Given a lower and upper bound find the max_length of a command line
# Returns:
# number of chars on the longest command line allowed
my ($lower, $upper) = (@_);
if($lower == $upper or $lower == $upper-1) { return $lower; }
my $middle = int (($upper-$lower)/2 + $lower);
::debug("init", "Maxlen: $lower,$upper,$middle : ");
if (is_acceptable_command_line_length($middle)) {
return binary_find_max_length($middle,$upper);
} else {
return binary_find_max_length($lower,$middle);
}
}
sub is_acceptable_command_line_length {
# Test if a command line of this length can run
# Returns:
# 0 if the command line length is too long
# 1 otherwise
my $len = shift;
local *STDERR;
open (STDERR, ">", "/dev/null");
system "true "."x"x$len;
close STDERR;
::debug("init", "$len=$? ");
return not $?;
}
package RecordQueue;
sub new {
my $class = shift;
my $fhs = shift;
my $colsep = shift;
my @unget = ();
my $arg_sub_queue;
if($colsep) {
# Open one file with colsep
$arg_sub_queue = RecordColQueue->new($fhs);
} else {
# Open one or more files if multiple -a
$arg_sub_queue = MultifileQueue->new($fhs);
}
return bless {
'unget' => \@unget,
'arg_number' => 0,
'arg_sub_queue' => $arg_sub_queue,
}, ref($class) || $class;
}
sub get {
# Returns:
# reference to array of Arg-objects
my $self = shift;
if(@{$self->{'unget'}}) {
$self->{'arg_number'}++;
return shift @{$self->{'unget'}};
}
my $ret = $self->{'arg_sub_queue'}->get();
if(defined $Global::max_number_of_args
and $Global::max_number_of_args == 0) {
::debug("run", "Read 1 but return 0 args\n");
return [Arg->new("")];
} else {
return $ret;
}
}
sub unget {
my $self = shift;
::debug("run", "RecordQueue-unget '@_'\n");
$self->{'arg_number'} -= @_;
unshift @{$self->{'unget'}}, @_;
}
sub empty {
my $self = shift;
my $empty = not @{$self->{'unget'}};
$empty &&= $self->{'arg_sub_queue'}->empty();
::debug("run", "RecordQueue->empty $empty");
return $empty;
}
sub arg_number {
my $self = shift;
return $self->{'arg_number'};
}
package RecordColQueue;
sub new {
my $class = shift;
my $fhs = shift;
my @unget = ();
my $arg_sub_queue = MultifileQueue->new($fhs);
return bless {
'unget' => \@unget,
'arg_sub_queue' => $arg_sub_queue,
}, ref($class) || $class;
}
sub get {
# Returns:
# reference to array of Arg-objects
my $self = shift;
if(@{$self->{'unget'}}) {
return shift @{$self->{'unget'}};
}
my $unget_ref=$self->{'unget'};
if($self->{'arg_sub_queue'}->empty()) {
return undef;
}
my $in_record = $self->{'arg_sub_queue'}->get();
if(defined $in_record) {
my @out_record = ();
for my $arg (@$in_record) {
::debug("run", "RecordColQueue::arg $arg\n");
my $line = $arg->orig();
::debug("run", "line='$line'\n");
if($line ne "") {
for my $s (split /$opt::colsep/o, $line, -1) {
push @out_record, Arg->new($s);
}
} else {
push @out_record, Arg->new("");
}
}
return \@out_record;
} else {
return undef;
}
}
sub unget {
my $self = shift;
::debug("run", "RecordColQueue-unget '@_'\n");
unshift @{$self->{'unget'}}, @_;
}
sub empty {
my $self = shift;
my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty());
::debug("run", "RecordColQueue->empty $empty");
return $empty;
}
package MultifileQueue;
@Global::unget_argv=();
sub new {
my $class = shift;
my $fhs = shift;
for my $fh (@$fhs) {
if(-t $fh) {
::warning("Input is read from the terminal. ".
"Only experts do this on purpose. ".
"Press CTRL-D to exit.\n");
}
}
return bless {
'unget' => \@Global::unget_argv,
'fhs' => $fhs,
'arg_matrix' => undef,
}, ref($class) || $class;
}
sub get {
my $self = shift;
if($opt::xapply) {
return $self->xapply_get();
} else {
return $self->nest_get();
}
}
sub unget {
my $self = shift;
::debug("run", "MultifileQueue-unget '@_'\n");
unshift @{$self->{'unget'}}, @_;
}
sub empty {
my $self = shift;
my $empty = (not @Global::unget_argv
and not @{$self->{'unget'}});
for my $fh (@{$self->{'fhs'}}) {
$empty &&= eof($fh);
}
::debug("run", "MultifileQueue->empty $empty ");
return $empty;
}
sub xapply_get {
my $self = shift;
if(@{$self->{'unget'}}) {
return shift @{$self->{'unget'}};
}
my @record = ();
my $prepend = undef;
my $empty = 1;
for my $fh (@{$self->{'fhs'}}) {
my $arg = read_arg_from_fh($fh);
if(defined $arg) {
# Record $arg for recycling at end of file
push @{$self->{'arg_matrix'}{$fh}}, $arg;
push @record, $arg;
$empty = 0;
} else {
::debug("run", "EOA ");
# End of file: Recycle arguments
push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
# return last @{$args->{'args'}{$fh}};
push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
}
}
if($empty) {
return undef;
} else {
return \@record;
}
}
sub nest_get {
my $self = shift;
if(@{$self->{'unget'}}) {
return shift @{$self->{'unget'}};
}
my @record = ();
my $prepend = undef;
my $empty = 1;
my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
if(not $self->{'arg_matrix'}) {
# Initialize @arg_matrix with one arg from each file
# read one line from each file
my @first_arg_set;
my $all_empty = 1;
for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
if(defined $arg) {
$all_empty = 0;
}
$self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
}
if($all_empty) {
# All filehandles were at eof or eof-string
return undef;
}
return [@first_arg_set];
}
# Treat the case with one input source special. For multiple
# input sources we need to remember all previously read values to
# generate all combinations. But for one input source we can
# forget the value after first use.
if($no_of_inputsources == 1) {
my $arg = read_arg_from_fh($self->{'fhs'}[0]);
if(defined($arg)) {
return [$arg];
}
return undef;
}
for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
if(eof($self->{'fhs'}[$fhno])) {
next;
} else {
# read one
my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
defined($arg) || next; # If we just read an EOF string: Treat this as EOF
my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
$self->{'arg_matrix'}[$fhno][$len] = $arg;
# make all new combinations
my @combarg = ();
for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}];
}
$combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry
# map combinations
# [ 1, 3, 7 ], [ 2, 4, 1 ]
# =>
# [ m[0][1], m[1][3], m[3][7] ], [ m[0][2], m[1][4], m[2][1] ]
my @mapped;
for my $c (expand_combinations(@combarg)) {
my @a;
for my $n (0 .. $no_of_inputsources - 1 ) {
push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
}
push @mapped, \@a;
}
# append the mapped to the ungotten arguments
push @{$self->{'unget'}}, @mapped;
# get the first
return shift @{$self->{'unget'}};
}
}
# all are eof or at EOF string; return from the unget queue
return shift @{$self->{'unget'}};
}
sub read_arg_from_fh {
# Read one Arg from filehandle
# Returns:
# Arg-object with one read line
# undef if end of file
my $fh = shift;
my $prepend = undef;
my $arg;
do {{
# This makes 10% faster
if(not ($arg = <$fh>)) {
if(defined $prepend) {
return Arg->new($prepend);
} else {
return undef;
}
}
# ::debug("run", "read $arg\n");
# Remove delimiter
$arg =~ s:$/$::;
if($Global::end_of_file_string and
$arg eq $Global::end_of_file_string) {
# Ignore the rest of input file
close $fh;
::debug("run", "EOF-string ($arg) met\n");
if(defined $prepend) {
return Arg->new($prepend);
} else {
return undef;
}
}
if(defined $prepend) {
$arg = $prepend.$arg; # For line continuation
$prepend = undef; #undef;
}
if($Global::ignore_empty) {
if($arg =~ /^\s*$/) {
redo; # Try the next line
}
}
if($Global::max_lines) {
if($arg =~ /\s$/) {
# Trailing space => continued on next line
$prepend = $arg;
redo;
}
}
}} while (1 == 0); # Dummy loop {{}} for redo
if(defined $arg) {
return Arg->new($arg);
} else {
::die_bug("multiread arg undefined");
}
}
sub expand_combinations {
# Input:
# ([xmin,xmax], [ymin,ymax], ...)
# Returns: ([x,y,...],[x,y,...])
# where xmin <= x <= xmax and ymin <= y <= ymax
my $minmax_ref = shift;
my $xmin = $$minmax_ref[0];
my $xmax = $$minmax_ref[1];
my @p;
if(@_) {
# If there are more columns: Compute those recursively
my @rest = expand_combinations(@_);
for(my $x = $xmin; $x <= $xmax; $x++) {
push @p, map { [$x, @$_] } @rest;
}
} else {
for(my $x = $xmin; $x <= $xmax; $x++) {
push @p, [$x];
}
}
return @p;
}
package Arg;
sub new {
my $class = shift;
my $orig = shift;
my @hostgroups;
if($opt::hostgroups) {
if($orig =~ s:@(.+)::) {
# We found hostgroups on the arg
@hostgroups = split(/\+/, $1);
if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
::warning("No such hostgroup (@hostgroups)\n");
@hostgroups = (keys %Global::hostgroups);
}
} else {
@hostgroups = (keys %Global::hostgroups);
}
}
return bless {
'orig' => $orig,
'hostgroups' => \@hostgroups,
}, ref($class) || $class;
}
sub replace {
# Calculates the corresponding value for a given perl expression
# Returns:
# The calculated string (quoted if asked for)
my $self = shift;
my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
my $quote = (shift) ? 1 : 0; # should the string be quoted?
# This is actually a CommandLine-object,
# but it looks nice to be able to say {= $job->slot() =}
my $job = shift;
$perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace
if(not defined $self->{"rpl",0,$perlexpr}) {
local $_;
if($Global::trim eq "n") {
$_ = $self->{'orig'};
} else {
$_ = trim_of($self->{'orig'});
}
::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
if(not $Global::perleval{$perlexpr}) {
# Make an anonymous function of the $perlexpr
# And more importantly: Compile it only once
if($Global::perleval{$perlexpr} =
eval('sub { no strict; no warnings; my $job = shift; '.
$perlexpr.' }')) {
# All is good
} else {
# The eval failed. Maybe $perlexpr is invalid perl?
::error("Cannot use $perlexpr: $@\n");
::wait_and_exit(255);
}
}
# Execute the function
$Global::perleval{$perlexpr}->($job);
$self->{"rpl",0,$perlexpr} = $_;
}
if(not defined $self->{"rpl",$quote,$perlexpr}) {
$self->{"rpl",1,$perlexpr} =
::shell_quote_scalar($self->{"rpl",0,$perlexpr});
}
return $self->{"rpl",$quote,$perlexpr};
}
sub orig {
my $self = shift;
return $self->{'orig'};
}
sub trim_of {
# Removes white space as specifed by --trim:
# n = nothing
# l = start
# r = end
# lr|rl = both
# Returns:
# string with white space removed as needed
my @strings = map { defined $_ ? $_ : "" } (@_);
my $arg;
if($Global::trim eq "n") {
# skip
} elsif($Global::trim eq "l") {
for my $arg (@strings) { $arg =~ s/^\s+//; }
} elsif($Global::trim eq "r") {
for my $arg (@strings) { $arg =~ s/\s+$//; }
} elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
} else {
::error("--trim must be one of: r l rl lr.\n");
::wait_and_exit(255);
}
return wantarray ? @strings : "@strings";
}
package TimeoutQueue;
sub new {
my $class = shift;
my $delta_time = shift;
my ($pct);
if($delta_time =~ /(\d+(\.\d+)?)%/) {
# Timeout in percent
$pct = $1/100;
$delta_time = 1_000_000;
}
return bless {
'queue' => [],
'delta_time' => $delta_time,
'pct' => $pct,
'remedian_idx' => 0,
'remedian_arr' => [],
'remedian' => undef,
}, ref($class) || $class;
}
sub delta_time {
my $self = shift;
return $self->{'delta_time'};
}
sub set_delta_time {
my $self = shift;
$self->{'delta_time'} = shift;
}
sub remedian {
my $self = shift;
return $self->{'remedian'};
}
sub set_remedian {
# Set median of the last 999^3 (=997002999) values using Remedian
#
# Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
# robust averaging method for large data sets." Journal of the
# American Statistical Association 85.409 (1990): 97-104.
my $self = shift;
my $val = shift;
my $i = $self->{'remedian_idx'}++;
my $rref = $self->{'remedian_arr'};
$rref->[0][$i%999] = $val;
$rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
$rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
$self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
}
sub update_delta_time {
# Update delta_time based on runtime of finished job if timeout is
# a percentage
my $self = shift;
my $runtime = shift;
if($self->{'pct'}) {
$self->set_remedian($runtime);
$self->{'delta_time'} = $self->{'pct'} * $self->remedian();
::debug("run", "Timeout: $self->{'delta_time'}s ");
}
}
sub process_timeouts {
# Check if there was a timeout
my $self = shift;
# $self->{'queue'} is sorted by start time
while (@{$self->{'queue'}}) {
my $job = $self->{'queue'}[0];
if($job->endtime()) {
# Job already finished. No need to timeout the job
# This could be because of --keep-order
shift @{$self->{'queue'}};
} elsif($job->timedout($self->{'delta_time'})) {
# Need to shift off queue before kill
# because kill calls usleep that calls process_timeouts
shift @{$self->{'queue'}};
$job->kill();
} else {
# Because they are sorted by start time the rest are later
last;
}
}
}
sub insert {
my $self = shift;
my $in = shift;
push @{$self->{'queue'}}, $in;
}
package Semaphore;
# This package provides a counting semaphore
#
# If a process dies without releasing the semaphore the next process
# that needs that entry will clean up dead semaphores
#
# The semaphores are stored in ~/.parallel/semaphores/id-<name> Each
# file in ~/.parallel/semaphores/id-<name>/ is the process ID of the
# process holding the entry. If the process dies, the entry can be
# taken by another process.
sub new {
my $class = shift;
my $id = shift;
my $count = shift;
$id=~s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
$id="id-".$id; # To distinguish it from a process id
my $parallel_dir = $ENV{'HOME'}."/.parallel";
-d $parallel_dir or mkdir_or_die($parallel_dir);
my $parallel_locks = $parallel_dir."/semaphores";
-d $parallel_locks or mkdir_or_die($parallel_locks);
my $lockdir = "$parallel_locks/$id";
my $lockfile = $lockdir.".lock";
if($count < 1) { ::die_bug("semaphore-count: $count"); }
return bless {
'lockfile' => $lockfile,
'lockfh' => Symbol::gensym(),
'lockdir' => $lockdir,
'id' => $id,
'idfile' => $lockdir."/".$id,
'pid' => $$,
'pidfile' => $lockdir."/".$$.'@'.::hostname(),
'count' => $count + 1 # nlinks returns a link for the 'id-' as well
}, ref($class) || $class;
}
sub acquire {
my $self = shift;
my $sleep = 1; # 1 ms
my $start_time = time;
while(1) {
$self->atomic_link_if_count_less_than() and last;
::debug("sem", "Remove dead locks");
my $lockdir = $self->{'lockdir'};
for my $d (glob "$lockdir/*") {
::debug("sem", "Lock $d $lockdir\n");
$d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
my ($pid, $host) = ($1, $2);
if($host eq ::hostname()) {
if(not kill 0, $1) {
::debug("sem", "Dead: $d");
unlink $d;
} else {
::debug("sem", "Alive: $d");
}
}
}
# try again
$self->atomic_link_if_count_less_than() and last;
# Retry slower and slower up to 1 second
$sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
# Random to avoid every sleeping job waking up at the same time
::usleep(rand()*$sleep);
if(defined($opt::timeout) and
$start_time + $opt::timeout > time) {
# Acquire the lock anyway
if(not -e $self->{'idfile'}) {
open (my $fh, ">", $self->{'idfile'}) or
::die_bug("timeout_write_idfile: $self->{'idfile'}");
close $fh;
}
link $self->{'idfile'}, $self->{'pidfile'};
last;
}
}
::debug("sem", "acquired $self->{'pid'}\n");
}
sub release {
my $self = shift;
unlink $self->{'pidfile'};
if($self->nlinks() == 1) {
# This is the last link, so atomic cleanup
$self->lock();
if($self->nlinks() == 1) {
unlink $self->{'idfile'};
rmdir $self->{'lockdir'};
}
$self->unlock();
}
::debug("run", "released $self->{'pid'}\n");
}
sub _release {
my $self = shift;
unlink $self->{'pidfile'};
$self->lock();
my $nlinks = $self->nlinks();
::debug("sem", $nlinks, "<", $self->{'count'});
if($nlinks-- > 1) {
unlink $self->{'idfile'};
open (my $fh, ">", $self->{'idfile'}) or
::die_bug("write_idfile: $self->{'idfile'}");
print $fh "#"x$nlinks;
close $fh;
} else {
unlink $self->{'idfile'};
rmdir $self->{'lockdir'};
}
$self->unlock();
::debug("sem", "released $self->{'pid'}\n");
}
sub atomic_link_if_count_less_than {
# Link $file1 to $file2 if nlinks to $file1 < $count
my $self = shift;
my $retval = 0;
$self->lock();
::debug($self->nlinks(), "<", $self->{'count'});
if($self->nlinks() < $self->{'count'}) {
-d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'});
if(not -e $self->{'idfile'}) {
open (my $fh, ">", $self->{'idfile'}) or
::die_bug("write_idfile: $self->{'idfile'}");
close $fh;
}
$retval = link $self->{'idfile'}, $self->{'pidfile'};
}
$self->unlock();
::debug("run", "atomic $retval");
return $retval;
}
sub _atomic_link_if_count_less_than {
# Link $file1 to $file2 if nlinks to $file1 < $count
my $self = shift;
my $retval = 0;
$self->lock();
my $nlinks = $self->nlinks();
::debug("sem", $nlinks, "<", $self->{'count'});
if($nlinks++ < $self->{'count'}) {
-d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'});
if(not -e $self->{'idfile'}) {
open (my $fh, ">", $self->{'idfile'}) or
::die_bug("write_idfile: $self->{'idfile'}");
close $fh;
}
open (my $fh, ">", $self->{'idfile'}) or
::die_bug("write_idfile: $self->{'idfile'}");
print $fh "#"x$nlinks;
close $fh;
$retval = link $self->{'idfile'}, $self->{'pidfile'};
}
$self->unlock();
::debug("sem", "atomic $retval");
return $retval;
}
sub nlinks {
my $self = shift;
if(-e $self->{'idfile'}) {
::debug("sem", "nlinks", (stat(_))[3], "size", (stat(_))[7], "\n");
return (stat(_))[3];
} else {
return 0;
}
}
sub lock {
my $self = shift;
my $sleep = 100; # 100 ms
my $total_sleep = 0;
$Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
my $locked = 0;
while(not $locked) {
if(tell($self->{'lockfh'}) == -1) {
# File not open
open($self->{'lockfh'}, ">", $self->{'lockfile'})
or ::debug("run", "Cannot open $self->{'lockfile'}");
}
if($self->{'lockfh'}) {
# File is open
chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
# The file is locked: No need to retry
$locked = 1;
last;
} else {
if ($! =~ m/Function not implemented/) {
::warning("flock: $!");
::warning("Will wait for a random while\n");
::usleep(rand(5000));
# File cannot be locked: No need to retry
$locked = 2;
last;
}
}
}
# Locking failed in first round
# Sleep and try again
$sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
# Random to avoid every sleeping job waking up at the same time
::usleep(rand()*$sleep);
$total_sleep += $sleep;
if($opt::semaphoretimeout) {
if($total_sleep/1000 > $opt::semaphoretimeout) {
# Timeout: bail out
::warning("Semaphore timed out. Ignoring timeout.");
$locked = 3;
last;
}
} else {
if($total_sleep/1000 > 30) {
::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout.");
}
}
}
::debug("run", "locked $self->{'lockfile'}");
}
sub unlock {
my $self = shift;
unlink $self->{'lockfile'};
close $self->{'lockfh'};
::debug("run", "unlocked\n");
}
sub mkdir_or_die {
# If dir is not writable: die
my $dir = shift;
my @dir_parts = split(m:/:,$dir);
my ($ddir,$part);
while(defined ($part = shift @dir_parts)) {
$part eq "" and next;
$ddir .= "/".$part;
-d $ddir and next;
mkdir $ddir;
}
if(not -w $dir) {
::error("Cannot write to $dir: $!\n");
::wait_and_exit(255);
}
}
# Keep perl -w happy
$opt::x = $Semaphore::timeout = $Semaphore::wait =
$Job::file_descriptor_warning_printed = 0;