Current Path : /bin/ |
Current File : //bin/parallel |
#!/usr/bin/env perl # Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 # Ole Tange and Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see <http://www.gnu.org/licenses/> # or write to the Free Software Foundation, Inc., 51 Franklin St, # Fifth Floor, Boston, MA 02110-1301 USA # open3 used in Job::start use IPC::Open3; # &WNOHANG used in reaper use POSIX qw(:sys_wait_h setsid ceil :errno_h); # gensym used in Job::start use Symbol qw(gensym); # tempfile used in Job::start use File::Temp qw(tempfile tempdir); # mkpath used in openresultsfile use File::Path; # GetOptions used in get_options_from_array use Getopt::Long; # Used to ensure code quality use strict; use File::Basename; save_stdin_stdout_stderr(); save_original_signal_handler(); parse_options(); ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n"); my $number_of_args; if($Global::max_number_of_args) { $number_of_args = $Global::max_number_of_args; } elsif ($opt::X or $opt::m or $opt::xargs) { $number_of_args = undef; } else { $number_of_args = 1; } my @command = @ARGV; my @input_source_fh; if($opt::pipepart) { # -a is used for data - not for command line args @input_source_fh = map { open_or_exit($_) } "/dev/null"; } else { @input_source_fh = map { open_or_exit($_) } @opt::a; if(not @input_source_fh and not $opt::pipe) { @input_source_fh = (*STDIN); } } if($opt::sql) { # Create SQL table to hold joblog + output $Global::sql->create_table($#input_source_fh+1); if($opt::sqlworker) { # Start a real --sqlworker in the background later $Global::sqlworker = 1; $opt::sqlworker = undef; } } if($opt::skip_first_line) { # Skip the first line for the first file handle my $fh = $input_source_fh[0]; <$fh>; } if($opt::header and not $opt::pipe) { # split with colsep or \t # $header force $colsep = \t if undef? my $delimiter = $opt::colsep; $delimiter ||= "\t"; my $id = 1; for my $fh (@input_source_fh) { my $line = <$fh>; chomp($line); ::debug("init", "Delimiter: '$delimiter'"); for my $s (split /$delimiter/o, $line) { ::debug("init", "Colname: '$s'"); # Replace {colname} with {2} for(@command,@Global::ret_files,@Global::transfer_files) { s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; } $Global::input_source_header{$id} = $s; $id++; } } } else { my $id = 1; for my $fh (@input_source_fh) { $Global::input_source_header{$id} = $id; $id++; } } if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { # Parallel check all hosts are up. Remove hosts that are down filter_hosts(); } if($opt::nonall or $opt::onall) { onall(\@input_source_fh,@command); wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); } # TODO --transfer foo/./bar --cleanup # multiple --transfer and --basefile with different /./ $Global::JobQueue = JobQueue->new( \@command,\@input_source_fh,$Global::ContextReplace, $number_of_args,\@Global::transfer_files,\@Global::ret_files); if($opt::pipepart) { if($opt::roundrobin) { # Compute size of -a my $size = 0; $size += -s $_ for @opt::a; # Compute $Global::max_jobs_running for my $sshlogin (values %Global::host) { $sshlogin->max_jobs_running(); } $Global::max_jobs_running or ::die_bug("Global::max_jobs_running not set"); # Set --blocksize = size / no of proc $opt::blocksize = 1 + $size / $Global::max_jobs_running; } @Global::cat_partials = map { pipe_part_files($_) } @opt::a; # Unget the empty arg as many times as there are parts $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget( map { [Arg->new("\0")] } @Global::cat_partials ); } if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) { # Count the number of jobs or shuffle all jobs # before starting any. # Must be done after ungetting any --pipepart jobs. $Global::JobQueue->total_jobs(); } # Compute $Global::max_jobs_running # Must be done after ungetting any --pipepart jobs. for my $sshlogin (values %Global::host) { $sshlogin->max_jobs_running(); } init_run_jobs(); my $sem; if($Global::semaphore) { $sem = acquire_semaphore(); } $SIG{TERM} = \&start_no_new_jobs; start_more_jobs(); if(not $opt::pipepart) { if($opt::pipe) { spreadstdin(); } } ::debug("init", "Start draining\n"); drain_job_queue(); ::debug("init", "Done draining\n"); reaper(); ::debug("init", "Done reaping\n"); if($opt::pipe and @opt::a) { for my $job (@Global::tee_jobs) { unlink $job->fh(2,"name"); $job->set_fh(2,"name",""); $job->print(); unlink $job->fh(1,"name"); } } ::debug("init", "Cleaning\n"); cleanup(); if($Global::semaphore) { $sem->release(); } for(keys %Global::sshmaster) { # If 'ssh -M's are running: kill them kill "TERM", $_; } ::debug("init", "Halt\n"); if($opt::halt and $Global::halt_when ne "never") { if(not defined $Global::halt_exitstatus) { if($Global::halt_pct) { $Global::halt_exitstatus = ::ceil($Global::total_failed / $Global::total_started * 100); } elsif($Global::halt_count) { $Global::halt_exitstatus = ::min(undef_as_zero($Global::total_failed),101); } } wait_and_exit($Global::halt_exitstatus); } else { wait_and_exit(min(undef_as_zero($Global::exitstatus),101)); } sub __PIPE_MODE__ {} sub pipe_part_files { # Input: # $file = the file to read # Returns: # @commands that will cat_partial each part my ($file) = @_; my $buf = ""; if(not -f $file) { ::error("$file is not a seekable file."); ::wait_and_exit(255); } my $header = find_header(\$buf,open_or_exit($file)); # find positions my @pos = find_split_positions($file,$opt::blocksize,length $header); # Make @cat_partials my @cat_partials = (); for(my $i=0; $i<$#pos; $i++) { push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]); } return @cat_partials; } sub find_header { # Input: # $buf_ref = reference to read-in buffer # $fh = filehandle to read from # Uses: # $opt::header # $opt::blocksize # Returns: # $header string my ($buf_ref, $fh) = @_; my $header = ""; if($opt::header) { if($opt::header eq ":") { $opt::header = "(.*\n)"; } # Number = number of lines $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) { if($$buf_ref=~s/^($opt::header)//) { $header = $1; last; } } } return $header; } sub find_split_positions { # Input: # $file = the file to read # $block = (minimal) --block-size of each chunk # $headerlen = length of header to be skipped # Uses: # $opt::recstart # $opt::recend # Returns: # @positions of block start/end my($file, $block, $headerlen) = @_; my $size = -s $file; $block = int $block; # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 # The optimal dd blocksize for freebsd = 2^15..2^17 my $dd_block_size = 131072; # 2^17 my @pos; my ($recstart,$recend) = recstartrecend(); my $recendrecstart = $recend.$recstart; my $fh = ::open_or_exit($file); push(@pos,$headerlen); for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) { my $buf; seek($fh, $pos, 0) || die; while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { if($opt::regexp) { # If match /$recend$recstart/ => Record position if($buf =~ /^(.*$recend)$recstart/os) { # Start looking for next record _after_ this match $pos += length($1); push(@pos,$pos); last; } } else { # If match $recend$recstart => Record position my $i = index64(\$buf,$recendrecstart); if($i != -1) { # Start looking for next record _after_ this match $pos += $i + length($recendrecstart); push(@pos,$pos); last; } } } } push(@pos,$size); close $fh; return @pos; } sub cat_partial { # Input: # $file = the file to read # ($start, $end, [$start2, $end2, ...]) = start byte, end byte # Returns: # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout my($file, @start_end) = @_; my($start, $i); # Convert start_end to start_len my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end; return "<". shell_quote_scalar($file) . q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } . " @start_len"; } sub spreadstdin { # read a record # Spawn a job and print the record to it. # Uses: # $opt::blocksize # STDIN # $opt::r # $Global::max_lines # $Global::max_number_of_args # $opt::regexp # $Global::start_no_new_jobs # $opt::roundrobin # %Global::running # Returns: N/A my $buf = ""; my ($recstart,$recend) = recstartrecend(); my $recendrecstart = $recend.$recstart; my $chunk_number = 1; my $one_time_through; my $two_gb = 2**31-1; my $blocksize = $opt::blocksize; my $in = *STDIN; my $header = find_header(\$buf,$in); while(1) { my $anything_written = 0; my $buflen = length $buf; my $readsize = ($buflen < $blocksize) ? $blocksize-$buflen : $blocksize; # If $buf < $blocksize, append so it is $blocksize long after reading. # Otherwise append a full $blocksize if(not read($in,substr($buf,$buflen,0),$readsize)) { # End-of-file $chunk_number != 1 and last; # Force the while-loop once if everything was read by header reading $one_time_through++ and last; } if($opt::r) { # Remove empty lines $buf =~ s/^\s*\n//gm; if(length $buf == 0) { next; } } if($Global::max_lines and not $Global::max_number_of_args) { # Read n-line records my $n_lines = $buf =~ tr/\n/\n/; my $last_newline_pos = rindex64(\$buf,"\n"); while($n_lines % $Global::max_lines) { $n_lines--; $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); } # Chop at $last_newline_pos as that is where n-line record ends $anything_written += write_record_to_pipe($chunk_number++,\$header,\$buf, $recstart,$recend,$last_newline_pos+1); shorten(\$buf,$last_newline_pos+1); } elsif($opt::regexp) { if($Global::max_number_of_args) { # -N => (start..*?end){n} # -L -N => (start..*?end){n*l} my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) { # Copy to modifiable variable my $b = $1; $anything_written += write_record_to_pipe($chunk_number++,\$header,\$b, $recstart,$recend,length $1); } } else { eof($in) and last; # Find the last recend-recstart in $buf if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { # Copy to modifiable variable my $b = $1; $anything_written += write_record_to_pipe($chunk_number++,\$header,\$b, $recstart,$recend,length $1); } } } else { if($Global::max_number_of_args) { # -N => (start..*?end){n} my $i = 0; my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { $i += length $recend; # find the actual splitting location $anything_written += write_record_to_pipe($chunk_number++,\$header,\$buf, $recstart,$recend,$i); shorten(\$buf,$i); } } else { eof($in) and last; # Find the last recend+recstart in $buf my $i = rindex64(\$buf,$recendrecstart); if($i != -1) { $i += length $recend; # find the actual splitting location $anything_written += write_record_to_pipe($chunk_number++,\$header,\$buf, $recstart,$recend,$i); shorten(\$buf,$i); } } } if(not $anything_written and not eof($in)) { # Nothing was written - maybe the block size < record size? # Increase blocksize exponentially up to 2GB-1 (2GB causes problems) if($blocksize < $two_gb) { my $old_blocksize = $blocksize; $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb); ::warning("A record was longer than $old_blocksize. " . "Increasing to --blocksize $blocksize."); } } } ::debug("init", "Done reading input\n"); # If there is anything left in the buffer write it write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf); if($opt::retries) { $Global::no_more_input = 1; # We need to start no more jobs: At most we need to retry some # of the already running. my @running = values %Global::running; # Stop any virgins. for my $job (@running) { if(defined $job and $job->virgin()) { close $job->fh(0,"w"); } } # Wait for running jobs to be done my $sleep =1; while($Global::total_running > 0) { $sleep = ::reap_usleep($sleep); } } $Global::start_no_new_jobs ||= 1; if($opt::roundrobin) { for my $job (values %Global::running) { close $job->fh(0,"w"); } my %incomplete_jobs = %Global::running; my $sleep = 1; while(keys %incomplete_jobs) { my $something_written = 0; for my $pid (keys %incomplete_jobs) { my $job = $incomplete_jobs{$pid}; if($job->block_length()) { $something_written += $job->non_blocking_write(); } else { delete $incomplete_jobs{$pid} } } if($something_written) { $sleep = $sleep/2+0.001; } $sleep = ::reap_usleep($sleep); } } } sub recstartrecend { # Uses: # $opt::recstart # $opt::recend # Returns: # $recstart,$recend with default values and regexp conversion my($recstart,$recend); if(defined($opt::recstart) and defined($opt::recend)) { # If both --recstart and --recend is given then both must match $recstart = $opt::recstart; $recend = $opt::recend; } elsif(defined($opt::recstart)) { # If --recstart is given it must match start of record $recstart = $opt::recstart; $recend = ""; } elsif(defined($opt::recend)) { # If --recend is given then it must match end of record $recstart = ""; $recend = $opt::recend; } if($opt::regexp) { # If $recstart/$recend contains '|' this should only apply to the regexp $recstart = "(?:".$recstart.")"; $recend = "(?:".$recend.")"; } else { # $recstart/$recend = printf strings (\n) $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; } return ($recstart,$recend); } sub nindex { # See if string is in buffer N times # Returns: # the position where the Nth copy is found my ($buf_ref, $str, $n) = @_; my $i = 0; for(1..$n) { $i = index64($buf_ref,$str,$i+1); if($i == -1) { last } } return $i; } { my @robin_queue; sub round_robin_write { # Input: # $header_ref = ref to $header string # $block_ref = ref to $block to be written # $recstart = record start string # $recend = record end string # $endpos = end position of $block # Uses: # %Global::running # Returns: # $something_written = amount of bytes written my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_; my $something_written = 0; my $block_passed = 0; my $sleep = 1; while(not $block_passed) { # Continue flushing existing buffers # until one is empty and a new block is passed # Make a queue to spread the blocks evenly if(not @robin_queue) { push @robin_queue, (sort { $a->seq() <=> $b->seq() } values %Global::running); } while(my $job = shift @robin_queue) { if($job->block_length() > 0) { $something_written += $job->non_blocking_write(); } else { $job->set_block($header_ref,$buffer_ref,$endpos,$recstart,$recend); $block_passed = 1; $job->set_virgin(0); $something_written += $job->non_blocking_write(); last; } } $sleep = ::reap_usleep($sleep); } return $something_written; } } sub index64 { # Do index on strings > 2GB. # index in Perl < v5.22 does not work for > 2GB # Input: # as index except STR which must be passed as a reference # Output: # as index my $ref = shift; my $match = shift; my $pos = shift || 0; my $block_size = 2**31-1; my $strlen = length($$ref); # No point in doing extra work if we don't need to. if($strlen < $block_size or $] > 5.022) { return index($$ref, $match, $pos); } my $matchlen = length($match); my $ret; my $offset = $pos; while($offset < $strlen) { $ret = index( substr($$ref, $offset, $block_size), $match, $pos-$offset); if($ret != -1) { return $ret + $offset; } $offset += ($block_size - $matchlen - 1); } return -1; } sub rindex64 { # Do rindex on strings > 2GB. # rindex in Perl < v5.22 does not work for > 2GB # Input: # as rindex except STR which must be passed as a reference # Output: # as rindex my $ref = shift; my $match = shift; my $pos = shift; my $block_size = 2**31-1; my $strlen = length($$ref); # Default: search from end $pos = defined $pos ? $pos : $strlen; # No point in doing extra work if we don't need to. if($strlen < $block_size) { return rindex($$ref, $match, $pos); } my $matchlen = length($match); my $ret; my $offset = $pos - $block_size + $matchlen; if($offset < 0) { # The offset is less than a $block_size # Set the $offset to 0 and # Adjust block_size accordingly $block_size = $block_size + $offset; $offset = 0; } while($offset >= 0) { $ret = rindex( substr($$ref, $offset, $block_size), $match); if($ret != -1) { return $ret + $offset; } $offset -= ($block_size - $matchlen - 1); } return -1; } sub shorten { # Do: substr($buf,0,$i) = ""; # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks # Input: # $buf_ref = \$buf # $i = position to shorten to # Returns: N/A my ($buf_ref, $i) = @_; my $two_gb = 2**31-1; while($i > $two_gb) { substr($$buf_ref,0,$two_gb) = ""; $i -= $two_gb; } substr($$buf_ref,0,$i) = ""; } sub write_record_to_pipe { # Fork then # Write record from pos 0 .. $endpos to pipe # Input: # $chunk_number = sequence number - to see if already run # $header_ref = reference to header string to prepend # $buffer_ref = reference to record to write # $recstart = start string of record # $recend = end string of record # $endpos = position in $buffer_ref where record ends # Uses: # $Global::job_already_run # $opt::roundrobin # @Global::virgin_jobs # Returns: # Number of chunks written (0 or 1) my ($chunk_number,$header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_; if($endpos == 0) { return 0; } if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } if($opt::roundrobin) { return round_robin_write($header_ref,$buffer_ref,$recstart,$recend,$endpos); } # If no virgin found, backoff my $sleep = 0.0001; # 0.01 ms - better performance on highend while(not @Global::virgin_jobs) { ::debug("pipe", "No virgin jobs"); $sleep = ::reap_usleep($sleep); # Jobs may not be started because of loadavg # or too little time between each ssh login # or retrying failed jobs. start_more_jobs(); } my $job = shift @Global::virgin_jobs; # Job is no longer virgin $job->set_virgin(0); if($opt::retries) { # Copy $buffer[0..$endpos] to $job->{'block'} # Remove rec_sep # Run $job->add_transfersize $job->set_block($header_ref,$buffer_ref,$endpos,$recstart,$recend); if(fork()) { # Skip } else { $job->write($job->block_ref()); close $job->fh(0,"w"); exit(0); } } else { # We ignore the removed rec_sep which is technically wrong. $job->add_transfersize($endpos + length $$header_ref); if(fork()) { # Skip } else { # Chop of at $endpos as we do not know how many rec_sep will # be removed. substr($$buffer_ref,$endpos,length $$buffer_ref) = ""; # Remove rec_sep if($opt::remove_rec_sep) { Job::remove_rec_sep($buffer_ref,$recstart,$recend); } $job->write($header_ref); $job->write($buffer_ref); close $job->fh(0,"w"); exit(0); } } close $job->fh(0,"w"); return 1; } sub __SEM_MODE__ {} sub acquire_semaphore { # Acquires semaphore. If needed: spawns to the background # Uses: # @Global::host # Returns: # The semaphore to be released when jobs is complete $Global::host{':'} = SSHLogin->new(":"); my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); $sem->acquire(); if($Semaphore::fg) { # skip } else { if(fork()) { exit(0); } else { # If run in the background, the PID will change $sem->pid_change(); } } return $sem; } sub __PARSE_OPTIONS__ {} sub options_hash { # Returns: # %hash = the GetOptions config return ("debug|D=s" => \$opt::D, "xargs" => \$opt::xargs, "m" => \$opt::m, "X" => \$opt::X, "v" => \@opt::v, "sql=s" => \$opt::sql, "sqlworker=s" => \$opt::sqlworker, "sqlandworker=s" => \$opt::sqlandworker, "joblog=s" => \$opt::joblog, "results|result|res=s" => \$opt::results, "resume" => \$opt::resume, "resume-failed|resumefailed" => \$opt::resume_failed, "retry-failed|retryfailed" => \$opt::retry_failed, "silent" => \$opt::silent, "keep-order|keeporder|k" => \$opt::keeporder, "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder, "group" => \$opt::group, "g" => \$opt::retired, "ungroup|u" => \$opt::ungroup, "linebuffer|linebuffered|line-buffer|line-buffered|lb" => \$opt::linebuffer, "tmux" => \$opt::tmux, "null|0" => \$opt::0, "quote|q" => \$opt::q, # Replacement strings "parens=s" => \$opt::parens, "rpl=s" => \@opt::rpl, "plus" => \$opt::plus, "I=s" => \$opt::I, "extensionreplace|er=s" => \$opt::U, "U=s" => \$opt::retired, "basenamereplace|bnr=s" => \$opt::basenamereplace, "dirnamereplace|dnr=s" => \$opt::dirnamereplace, "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace, "seqreplace=s" => \$opt::seqreplace, "slotreplace=s" => \$opt::slotreplace, "jobs|j=s" => \$opt::jobs, "delay=f" => \$opt::delay, "sshdelay=f" => \$opt::sshdelay, "load=s" => \$opt::load, "noswap" => \$opt::noswap, "max-line-length-allowed" => \$opt::max_line_length_allowed, "number-of-cpus" => \$opt::number_of_cpus, "number-of-cores" => \$opt::number_of_cores, "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores, "shellquote|shell_quote|shell-quote" => \$opt::shellquote, "nice=i" => \$opt::nice, "tag" => \$opt::tag, "tagstring|tag-string=s" => \$opt::tagstring, "onall" => \$opt::onall, "nonall" => \$opt::nonall, "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts, "sshlogin|S=s" => \@opt::sshlogin, "sshloginfile|slf=s" => \@opt::sshloginfile, "controlmaster|M" => \$opt::controlmaster, "ssh=s" => \$opt::ssh, "transfer-file|transferfile|transfer-files|transferfiles|tf=s" => \@opt::transfer_files, "return=s" => \@opt::return, "trc=s" => \@opt::trc, "transfer" => \$opt::transfer, "cleanup" => \$opt::cleanup, "basefile|bf=s" => \@opt::basefile, "B=s" => \$opt::retired, "ctrlc|ctrl-c" => \$opt::retired, "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired, "workdir|work-dir|wd=s" => \$opt::workdir, "W=s" => \$opt::retired, "tmpdir=s" => \$opt::tmpdir, "tempdir=s" => \$opt::tmpdir, "use-compress-program|compress-program=s" => \$opt::compress_program, "use-decompress-program|decompress-program=s" => \$opt::decompress_program, "compress" => \$opt::compress, "tty" => \$opt::tty, "T" => \$opt::retired, "H=i" => \$opt::retired, "dry-run|dryrun" => \$opt::dryrun, "progress" => \$opt::progress, "eta" => \$opt::eta, "bar" => \$opt::bar, "shuf" => \$opt::shuf, "arg-sep|argsep=s" => \$opt::arg_sep, "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep, "trim=s" => \$opt::trim, "env=s" => \@opt::env, "recordenv|record-env" => \$opt::record_env, "plain" => \$opt::plain, "profile|J=s" => \@opt::profile, "pipe|spreadstdin" => \$opt::pipe, "robin|round-robin|roundrobin" => \$opt::roundrobin, "recstart=s" => \$opt::recstart, "recend=s" => \$opt::recend, "regexp|regex" => \$opt::regexp, "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep, "files|output-as-files|outputasfiles" => \$opt::files, "block|block-size|blocksize=s" => \$opt::blocksize, "tollef" => \$opt::tollef, "gnu" => \$opt::gnu, "xapply" => \$opt::xapply, "bibtex|citation" => \$opt::bibtex, "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite, # Termination and retries "halt-on-error|halt=s" => \$opt::halt, "memfree=s" => \$opt::memfree, "retries=i" => \$opt::retries, "timeout=s" => \$opt::timeout, "termseq|term-seq=s" => \$opt::termseq, # xargs-compatibility - implemented, man, testsuite "max-procs|P=s" => \$opt::jobs, "delimiter|d=s" => \$opt::d, "max-chars|s=i" => \$opt::max_chars, "arg-file|a=s" => \@opt::a, "no-run-if-empty|r" => \$opt::r, "replace|i:s" => \$opt::i, "E=s" => \$opt::eof, "eof|e:s" => \$opt::eof, "max-args|n=i" => \$opt::max_args, "max-replace-args|N=i" => \$opt::max_replace_args, "colsep|col-sep|C=s" => \$opt::colsep, "help|h" => \$opt::help, "L=f" => \$opt::L, "max-lines|l:f" => \$opt::max_lines, "interactive|p" => \$opt::interactive, "verbose|t" => \$opt::verbose, "version|V" => \$opt::version, "minversion|min-version=i" => \$opt::minversion, "show-limits|showlimits" => \$opt::show_limits, "exit|x" => \$opt::x, # Semaphore "semaphore" => \$opt::semaphore, "semaphoretimeout|st=i" => \$opt::semaphoretimeout, "semaphorename|id=s" => \$opt::semaphorename, "fg" => \$opt::fg, "bg" => \$opt::bg, "wait" => \$opt::wait, # Shebang #!/usr/bin/parallel --shebang "shebang|hashbang" => \$opt::shebang, "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles, "Y" => \$opt::retired, "skip-first-line" => \$opt::skip_first_line, "header=s" => \$opt::header, "cat" => \$opt::cat, "fifo" => \$opt::fifo, "pipepart|pipe-part" => \$opt::pipepart, "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups, ); } sub get_options_from_array { # Run GetOptions on @array # Input: # $array_ref = ref to @ARGV to parse # @keep_only = Keep only these options # Uses: # @ARGV # Returns: # true if parsing worked # false if parsing failed # @$array_ref is changed my ($array_ref, @keep_only) = @_; if(not @$array_ref) { # Empty array: No need to look more at that return 1; } # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not # supported everywhere my @save_argv; my $this_is_ARGV = (\@::ARGV == $array_ref); if(not $this_is_ARGV) { @save_argv = @::ARGV; @::ARGV = @{$array_ref}; } # If @keep_only set: Ignore all values except @keep_only my %options = options_hash(); if(@keep_only) { my (%keep,@dummy); @keep{@keep_only} = @keep_only; for my $k (grep { not $keep{$_} } keys %options) { # Store the value of the option in @dummy $options{$k} = \@dummy; } } my $retval = GetOptions(%options); if(not $this_is_ARGV) { @{$array_ref} = @::ARGV; @::ARGV = @save_argv; } return $retval; } sub parse_options { # Returns: N/A init_globals(); @ARGV = read_options(); # no-* overrides * if($opt::nokeeporder) { $opt::keeporder = undef; } if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 $Global::debug = $opt::D; $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh"; $Global::cshell = $Global::shell =~ m:/csh:; if(defined $opt::X) { $Global::ContextReplace = 1; } if(defined $opt::silent) { $Global::verbose = 0; } if(defined $opt::0) { $/ = "\0"; } if(defined $opt::d) { $/ = unquote_printf($opt::d) } if(defined $opt::tagstring) { $opt::tagstring = unquote_printf($opt::tagstring); } if(defined $opt::interactive) { $Global::interactive = $opt::interactive; } if(defined $opt::q) { $Global::quoting = 1; } if(defined $opt::r) { $Global::ignore_empty = 1; } if(defined $opt::verbose) { $Global::stderr_verbose = 1; } parse_replacement_string_options(); if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } $opt::nice ||= 0; if(defined $opt::help) { die_usage(); } if(defined $opt::sqlandworker) { $opt::sql = $opt::sqlworker = $opt::sqlandworker; } if(defined $opt::colsep) { $Global::trim = 'lr'; } if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; } if(defined $opt::trim) { $Global::trim = $opt::trim; } if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; } if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } if(defined $opt::number_of_cores) { print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); } if(defined $opt::max_line_length_allowed) { print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); } if(defined $opt::version) { version(); wait_and_exit(0); } if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); } if(defined $opt::record_env) { record_env(); wait_and_exit(0); } if(defined $opt::show_limits) { show_limits(); } if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } if(@opt::return) { push @Global::ret_files, @opt::return; } if($opt::transfer) { push @Global::transfer_files, $opt::i || $opt::I || "{}"; } if(@opt::transfer_files) { push @Global::transfer_files, @opt::transfer_files; } if(not defined $opt::recstart and not defined $opt::recend) { $opt::recend = "\n"; } if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } $opt::blocksize = multiply_binary_prefix($opt::blocksize); if($opt::blocksize > 2**31-1) { warning("--blocksize >= 2G causes problems. Using 2G-1."); $opt::blocksize = 2**31-1; } $opt::memfree = multiply_binary_prefix($opt::memfree); check_invalid_option_combinations(); if((defined $opt::fifo or defined $opt::cat) and not $opt::pipepart) { $opt::pipe = 1; } if(defined $opt::minversion) { print $Global::version,"\n"; if($Global::version < $opt::minversion) { wait_and_exit(255); } else { wait_and_exit(0); } } if(not defined $opt::delay) { # Set --delay to --sshdelay if not set $opt::delay = $opt::sshdelay; } if($opt::compress_program) { $opt::compress = 1; $opt::decompress_program ||= $opt::compress_program." -dc"; } if($opt::compress) { my ($compress, $decompress) = find_compression_program(); $opt::compress_program ||= $compress; $opt::decompress_program ||= $decompress; } if(defined $opt::nonall and not grep /\Q$Global::arg_sep\E|\Q$Global::arg_sep\E/, @ARGV) { # Append a dummy empty argument if there are no arguments # on the command line to avoid reading from STDIN. # \0 => nothing (not the empty string) push @ARGV, $Global::arg_sep, "\0"; } if(defined $opt::tty) { # Defaults for --tty: -j1 -u # Can be overridden with -jXXX -g if(not defined $opt::jobs) { $opt::jobs = 1; } if(not defined $opt::group) { $opt::ungroup = 1; } } if(@opt::trc) { push @Global::ret_files, @opt::trc; if(not @Global::transfer_files) { # Defaults to --transferfile {} push @Global::transfer_files, $opt::i || $opt::I || "{}"; } $opt::cleanup = 1; } if(defined $opt::max_lines) { if($opt::max_lines eq "-0") { # -l -0 (swallowed -0) $opt::max_lines = 1; $opt::0 = 1; $/ = "\0"; } elsif ($opt::max_lines == 0) { # If not given (or if 0 is given) => 1 $opt::max_lines = 1; } $Global::max_lines = $opt::max_lines; if(not $opt::pipe) { # --pipe -L means length of record - not max_number_of_args $Global::max_number_of_args ||= $Global::max_lines; } } # Read more than one arg at a time (-L, -N) if(defined $opt::L) { $Global::max_lines = $opt::L; if(not $opt::pipe) { # --pipe -L means length of record - not max_number_of_args $Global::max_number_of_args ||= $Global::max_lines; } } if(defined $opt::max_replace_args) { $Global::max_number_of_args = $opt::max_replace_args; $Global::ContextReplace = 1; } if((defined $opt::L or defined $opt::max_replace_args) and not ($opt::xargs or $opt::m)) { $Global::ContextReplace = 1; } if(defined $opt::tag and not defined $opt::tagstring) { # Default = {} $opt::tagstring = $Global::parensleft.$Global::parensright; } if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) { # Deal with ::: and :::: @ARGV=read_args_from_command_line(); } parse_semaphore(); if(defined $opt::eta) { $opt::progress = $opt::eta; } if(defined $opt::bar) { $opt::progress = $opt::bar; } citation_notice(); parse_halt(); parse_sshlogin(); parse_env_var(); if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { # As we do not know the max line length on the remote machine # long commands generated by xargs may fail # If $opt::max_replace_args is set, it is probably safe ::warning("Using -X or -m with --sshlogin may fail."); } if(not defined $opt::jobs) { $opt::jobs = "100%"; } open_joblog(); ($opt::sql or $opt::sqlworker) and $Global::sql = SQL->new($opt::sql || $opt::sqlworker); } sub check_invalid_option_combinations { if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { ::error("--timeout must be seconds or percentage."); wait_and_exit(255); } if(defined $opt::fifo and defined $opt::cat) { ::error("--fifo cannot be combined with --cat."); ::wait_and_exit(255); } if(defined $opt::retries and defined $opt::roundrobin) { ::error("--retries cannot be combined with --roundrobin."); ::wait_and_exit(255); } if(defined $opt::pipepart and (defined $opt::L or defined $opt::max_lines or defined $opt::max_replace_args)) { ::error("--pipepart is incompatible with --max-replace-args, ". "--max-lines, and -L."); wait_and_exit(255); } if(defined $opt::group and $opt::ungroup) { ::error("--group cannot be combined with --ungroup."); ::wait_and_exit(255); } if(defined $opt::group and $opt::linebuffer) { ::error("--group cannot be combined with --line-buffer."); ::wait_and_exit(255); } if(defined $opt::ungroup and $opt::linebuffer) { ::error("--ungroup cannot be combined with --line-buffer."); ::wait_and_exit(255); } if(defined $opt::tollef and not $opt::gnu) { ::error("--tollef has been retired.","Remove --tollef or use --gnu to override --tollef."); ::wait_and_exit(255); } if(defined $opt::retired) { ::error("-g has been retired. Use --group.", "-B has been retired. Use --bf.", "-T has been retired. Use --tty.", "-U has been retired. Use --er.", "-W has been retired. Use --wd.", "-Y has been retired. Use --shebang.", "-H has been retired. Use --halt.", "--ctrlc has been retired.", "--noctrlc has been retired."); ::wait_and_exit(255); } } sub init_globals { # Defaults: $Global::version = 20160222; $Global::progname = 'parallel'; $Global::infinity = 2**31; $Global::debug = 0; $Global::verbose = 0; $Global::quoting = 0; $Global::total_completed = 0; # Read only table with default --rpl values %Global::replace = ( '{}' => '', '{#}' => '1 $_=$job->seq()', '{%}' => '1 $_=$job->slot()', '{/}' => 's:.*/::', '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);', '{/.}' => 's:.*/::; s:\.[^/.]+$::;', '{.}' => 's:\.[^/.]+$::', ); %Global::plus = ( # {} = {+/}/{/} # = {.}.{+.} = {+/}/{/.}.{+.} # = {..}.{+..} = {+/}/{/..}.{+..} # = {...}.{+...} = {+/}/{/...}.{+...} '{+/}' => 's:/[^/]*$::', '{+.}' => 's:.*\.::', '{+..}' => 's:.*\.([^.]*\.):$1:', '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:', '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::', '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::', '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', # {##} = number of jobs '{##}' => '$_=total_jobs()', ); # Modifiable copy of %Global::replace %Global::rpl = %Global::replace; $/ = "\n"; $Global::ignore_empty = 0; $Global::interactive = 0; $Global::stderr_verbose = 0; $Global::default_simultaneous_sshlogins = 9; $Global::exitstatus = 0; $Global::arg_sep = ":::"; $Global::arg_file_sep = "::::"; $Global::trim = 'n'; $Global::max_jobs_running = 0; $Global::job_already_run = ''; $ENV{'TMPDIR'} ||= "/tmp"; if(not $ENV{HOME}) { # $ENV{HOME} is sometimes not set if called from PHP ::warning("\$HOME not set. Using /tmp."); $ENV{HOME} = "/tmp"; } } sub parse_halt { # $opt::halt flavours # Uses: # $opt::halt # $Global::halt_when # $Global::halt_fail # $Global::halt_success # $Global::halt_pct # $Global::halt_count if(defined $opt::halt) { my %halt_expansion = ( "0" => "never", "1" => "soon,fail=1", "2" => "now,fail=1", "-1" => "soon,success=1", "-2" => "now,success=1", ); # Expand -2,-1,0,1,2 into long form $opt::halt = $halt_expansion{$opt::halt} || $opt::halt; # --halt 5% == --halt soon,fail=5% $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/; # Split: soon,fail=5% my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt; if(not grep { $when eq $_ } qw(never soon now)) { ::error("--halt must have 'never', 'soon', or 'now'."); ::wait_and_exit(255); } $Global::halt_when = $when; if($when ne "never") { if($fail_success eq "fail") { $Global::halt_fail = 1; } elsif($fail_success eq "success") { $Global::halt_success = 1; } else { ::error("--halt $when must be followed by ,success or ,fail."); ::wait_and_exit(255); } if($pct_count =~ /^(\d+)%$/) { $Global::halt_pct = $1/100; } elsif($pct_count =~ /^(\d+)$/) { $Global::halt_count = $1; } else { ::error("--halt $when,$fail_success ". "must be followed by ,number or ,percent%."); ::wait_and_exit(255); } } } } sub parse_replacement_string_options { # Deal with --rpl # Uses: # %Global::rpl # $Global::parensleft # $Global::parensright # $opt::parens # $Global::parensleft # $Global::parensright # $opt::plus # %Global::plus # $opt::I # $opt::U # $opt::i # $opt::basenamereplace # $opt::dirnamereplace # $opt::seqreplace # $opt::slotreplace # $opt::basenameextensionreplace sub rpl { # Modify %Global::rpl # Replace $old with $new my ($old,$new) = @_; if($old ne $new) { $Global::rpl{$new} = $Global::rpl{$old}; delete $Global::rpl{$old}; } } my $parens = "{==}"; if(defined $opt::parens) { $parens = $opt::parens; } my $parenslen = 0.5*length $parens; $Global::parensleft = substr($parens,0,$parenslen); $Global::parensright = substr($parens,$parenslen); if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } if(defined $opt::I) { rpl('{}',$opt::I); } if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } if(defined $opt::U) { rpl('{.}',$opt::U); } if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } if(defined $opt::basenameextensionreplace) { rpl('{/.}',$opt::basenameextensionreplace); } for(@opt::rpl) { # Create $Global::rpl entries for --rpl options # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" my ($shorthand,$long) = split/ /,$_,2; $Global::rpl{$shorthand} = $long; } } sub parse_semaphore { # Semaphore defaults # Must be done before computing number of processes and max_line_length # because when running as a semaphore GNU Parallel does not read args # Uses: # $opt::semaphore # $Global::semaphore # $opt::semaphoretimeout # $Semaphore::timeout # $opt::semaphorename # $Semaphore::name # $opt::fg # $Semaphore::fg # $opt::wait # $Semaphore::wait # $opt::bg # @opt::a # @Global::unget_argv # $Global::default_simultaneous_sshlogins # $opt::jobs # $Global::interactive $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' if(defined $opt::semaphore) { $Global::semaphore = 1; } if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } if(defined $opt::semaphorename) { $Global::semaphore = 1; } if(defined $opt::fg) { $Global::semaphore = 1; } if(defined $opt::bg) { $Global::semaphore = 1; } if(defined $opt::wait) { $Global::semaphore = 1; @ARGV = "true"; } if($Global::semaphore) { if(@opt::a) { # A semaphore does not take input from neither stdin nor file ::error("A semaphore does not take input from neither stdin nor a file\n"); ::wait_and_exit(255); } @opt::a = ("/dev/null"); # Append a dummy empty argument # \0 => nothing (not the empty string) push(@Global::unget_argv, [Arg->new("\0")]); $Semaphore::timeout = $opt::semaphoretimeout || 0; if(defined $opt::semaphorename) { $Semaphore::name = $opt::semaphorename; } else { $Semaphore::name = `tty`; chomp $Semaphore::name; } $Semaphore::fg = $opt::fg; $Semaphore::wait = $opt::wait; $Global::default_simultaneous_sshlogins = 1; if(not defined $opt::jobs) { $opt::jobs = 1; } if($Global::interactive and $opt::bg) { ::error("Jobs running in the ". "background cannot be interactive."); ::wait_and_exit(255); } } } sub record_env { # Record current %ENV-keys in ~/.parallel/ignored_vars # Returns: N/A my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars"; if(open(my $vars_fh, ">", $ignore_filename)) { print $vars_fh map { $_,"\n" } keys %ENV; } else { ::error("Cannot write to $ignore_filename."); ::wait_and_exit(255); } } sub parse_env_var { # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen # # Bash functions must be parsed to export them remotely # Pre-shellshock style bash function: # myfunc=() {... # Post-shellshock style bash function (v1): # BASH_FUNC_myfunc()=() {... # Post-shellshock style bash function (v2): # BASH_FUNC_myfunc%%=() {... # # Uses: # $Global::envvar = eval string that will set variables in both bash and csh # $Global::envwarn = If functions are used: Give warning in csh # $Global::envvarlen = length of $Global::envvar # @opt::env # $Global::shell # %ENV # Returns: N/A $Global::envvar = ""; $Global::envvarlen = length $Global::envvar; } sub open_joblog { # Open joblog as specified by --joblog # Uses: # $opt::resume # $opt::resume_failed # $opt::joblog # $opt::results # $Global::job_already_run # %Global::fd my $append = 0; if(($opt::resume or $opt::resume_failed) and not ($opt::joblog or $opt::results)) { ::error("--resume and --resume-failed require --joblog or --results."); ::wait_and_exit(255); } if($opt::joblog) { if($opt::resume || $opt::resume_failed || $opt::retry_failed) { if(open(my $joblog_fh, "<", $opt::joblog)) { # Read the joblog $append = <$joblog_fh>; # If there is a header: Open as append later my $joblog_regexp; if($opt::retry_failed) { # Make a regexp that only matches commands with exit+signal=0 # 4 host 1360490623.067 3.445 1023 1222 0 0 command $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; my @group; while(<$joblog_fh>) { if(/$joblog_regexp/o) { # This is 30% faster than set_job_already_run($1); vec($Global::job_already_run,($1||0),1) = 1; $Global::total_completed++; $group[$1-1] = "true"; } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) { $group[$1-1] = $3 } else { chomp; ::error("Format of '$opt::joblog' is wrong: $_"); ::wait_and_exit(255); } } if(@group) { my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); unlink($name); # Put args into argfile print $outfh map { $_,$/ } @group; seek $outfh, 0, 0; exit_if_disk_full(); # Set filehandle to -a @opt::a = ($outfh); } # Remove $command (so -a is run) @ARGV = (); } if($opt::resume || $opt::resume_failed) { if($opt::resume_failed) { # Make a regexp that only matches commands with exit+signal=0 # 4 host 1360490623.067 3.445 1023 1222 0 0 command $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; } else { # Just match the job number $joblog_regexp='^(\d+)'; } while(<$joblog_fh>) { if(/$joblog_regexp/o) { # This is 30% faster than set_job_already_run($1); vec($Global::job_already_run,($1||0),1) = 1; $Global::total_completed++; } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) { ::error("Format of '$opt::joblog' is wrong: $_"); ::wait_and_exit(255); } } } close $joblog_fh; } } if($append) { # Append to joblog if(not open($Global::joblog, ">>", $opt::joblog)) { ::error("Cannot append to --joblog $opt::joblog."); ::wait_and_exit(255); } } else { if($opt::dryrun) { # Do not write to joblog in a dry-run if(not open($Global::joblog, ">", "/dev/null")) { ::error("Cannot write to --joblog $opt::joblog."); ::wait_and_exit(255); } } elsif($opt::joblog eq "-") { # Use STDOUT as joblog $Global::joblog = $Global::fd{1}; } elsif(not open($Global::joblog, ">", $opt::joblog)) { # Overwrite the joblog ::error("Cannot write to --joblog $opt::joblog."); ::wait_and_exit(255); } print $Global::joblog join("\t", "Seq", "Host", "Starttime", "JobRuntime", "Send", "Receive", "Exitval", "Signal", "Command" ). "\n"; } } } sub find_compression_program { # Find a fast compression program # Returns: # $compress_program = compress program with options # $decompress_program = decompress program with options # Search for these. Sorted by speed on 16 core # parallel -j1 --joblog jl --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: gz '>'/dev/null , 1 2 3 , {1..3} , lz4 lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2 # sort -nk4 jl my @prg = qw(lz4 pigz lzop plzip pbzip2 pxz gzip lzma xz bzip2 lzip); for my $p (@prg) { if(which($p)) { return ("$p -c -1","$p -dc"); } } # Fall back to cat return ("cat","cat"); } sub read_options { # Read options from command line, profile and $PARALLEL # Uses: # $opt::shebang_wrap # $opt::shebang # @ARGV # $opt::plain # @opt::profile # $ENV{'HOME'} # $ENV{'PARALLEL'} # Returns: # @ARGV_no_opt = @ARGV without --options # This must be done first as this may exec myself if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or $ARGV[0] =~ /^--shebang-?wrap/ or $ARGV[0] =~ /^--hashbang/)) { # Program is called from #! line in script # remove --shebang-wrap if it is set $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); # remove --shebang if it is set $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); # remove --hashbang if it is set $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); if($opt::shebang) { my $argfile = shell_quote_scalar(pop @ARGV); # exec myself to split $ARGV[0] into separate fields exec "$0 --skip-first-line -a $argfile @ARGV"; } if($opt::shebang_wrap) { my @options; my @parser; if ($^O eq 'freebsd') { # FreeBSD's #! puts different values in @ARGV than Linux' does. my @nooptions = @ARGV; get_options_from_array(\@nooptions); while($#ARGV > $#nooptions) { push @options, shift @ARGV; } while(@ARGV and $ARGV[0] ne ":::") { push @parser, shift @ARGV; } if(@ARGV and $ARGV[0] eq ":::") { shift @ARGV; } } else { @options = shift @ARGV; } my $script = shell_quote_scalar(shift @ARGV); # exec myself to split $ARGV[0] into separate fields exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV"; } } Getopt::Long::Configure("bundling","require_order"); my @ARGV_copy = @ARGV; my @ARGV_orig = @ARGV; # Check if there is a --profile to set @opt::profile get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); my @ARGV_profile = (); my @ARGV_env = (); if(not $opt::plain) { # Add options from .parallel/config and other profiles my @config_profiles = ( "/etc/parallel/config", $ENV{'HOME'}."/.parallel/config", $ENV{'HOME'}."/.parallelrc"); my @profiles = @config_profiles; if(@opt::profile) { # --profile overrides default profiles @profiles = (); for my $profile (@opt::profile) { if(-r $profile) { push @profiles, $profile; } else { push @profiles, $ENV{'HOME'}."/.parallel/".$profile; } } } for my $profile (@profiles) { if(-r $profile) { open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile"); while(<$in_fh>) { /^\s*\#/ and next; chomp; push @ARGV_profile, shellwords($_); } close $in_fh; } else { if(grep /^$profile$/, @config_profiles) { # config file is not required to exist } else { ::error("$profile not readable."); wait_and_exit(255); } } } # Add options from shell variable $PARALLEL if($ENV{'PARALLEL'}) { @ARGV_env = shellwords($ENV{'PARALLEL'}); } } Getopt::Long::Configure("bundling","require_order"); get_options_from_array(\@ARGV_profile) || die_usage(); get_options_from_array(\@ARGV_env) || die_usage(); get_options_from_array(\@ARGV) || die_usage(); # What were the options given on the command line? # Used to start --sqlworker my $ai = arrayindex(\@ARGV_orig, \@ARGV); @Global::options_in_argv = @ARGV_orig[0..$ai-1]; # Prepend non-options to @ARGV (such as commands like 'nice') unshift @ARGV, @ARGV_profile, @ARGV_env; return @ARGV; } sub arrayindex { # Similar to Perl's index function, but for arrays # Input: # $arr_ref1 = ref to @array1 to search in # $arr_ref2 = ref to @array2 to search for my ($arr_ref1,$arr_ref2) = @_; my $array1_as_string = join "", map { "\257\257".$_ } @$arr_ref1; my $array2_as_string = join "", map { "\257\257".$_ } @$arr_ref2; my $i = index($array1_as_string,$array2_as_string,0); if($i == -1) { return -1 } my @before = split /\257\257/, substr($array1_as_string,0,$i); return $#before; } sub read_args_from_command_line { # Arguments given on the command line after: # ::: ($Global::arg_sep) # :::: ($Global::arg_file_sep) # Removes the arguments from @ARGV and: # - puts filenames into -a # - puts arguments into files and add the files to -a # Input: # @::ARGV = command option ::: arg arg arg :::: argfiles # Uses: # $Global::arg_sep # $Global::arg_file_sep # $opt::internal_pipe_means_argfiles # $opt::pipe # @opt::a # Returns: # @argv_no_argsep = @::ARGV without ::: and :::: and following args my @new_argv = (); for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { if($arg eq $Global::arg_sep or $arg eq $Global::arg_file_sep) { my $group = $arg; # This group of arguments is args or argfiles my @group; while(defined ($arg = shift @ARGV)) { if($arg eq $Global::arg_sep or $arg eq $Global::arg_file_sep) { # exit while loop if finding new separator last; } else { # If not hitting ::: or :::: # Append it to the group push @group, $arg; } } if($group eq $Global::arg_file_sep or ($opt::internal_pipe_means_argfiles and $opt::pipe) ) { # Group of file names on the command line. # Append args into -a push @opt::a, @group; } elsif($group eq $Global::arg_sep) { # Group of arguments on the command line. # Put them into a file. # Create argfile my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); unlink($name); # Put args into argfile print $outfh map { $_,$/ } @group; seek $outfh, 0, 0; exit_if_disk_full(); # Append filehandle to -a push @opt::a, $outfh; } else { ::die_bug("Unknown command line group: $group"); } if(defined($arg)) { # $arg is ::: or :::: redo; } else { # $arg is undef -> @ARGV empty last; } } push @new_argv, $arg; } # Output: @ARGV = command to run with options return @new_argv; } sub cleanup { # Returns: N/A unlink keys %Global::unlink; map { rmdir $_ } keys %Global::unlink; if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } } sub __QUOTING_ARGUMENTS_FOR_SHELL__ {} sub shell_quote { # Input: # @strings = strings to be quoted # Output: # @shell_quoted_strings = string quoted with \ as needed by the shell return wantarray ? (map { shell_quote_scalar($_) } @_) : (join" ",map { shell_quote_scalar($_) } @_); } sub shell_quote_scalar_rc { # Quote for the rc-shell my $a = $_[0]; if(defined $a) { if(($a =~ s/'/''/g) + ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) { # A string was replaced # No need to test for "" or \0 } elsif($a eq "") { $a = "''"; } elsif($a eq "\0") { $a = ""; } } return $a; } sub shell_quote_scalar_csh { # Quote for (t)csh my $a = $_[0]; if(defined $a) { # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; # This is 1% faster than the above if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + # quote newline in csh as \\\n ($a =~ s/[\n]/"\\\n"/go)) { # A string was replaced # No need to test for "" or \0 } elsif($a eq "") { $a = "''"; } elsif($a eq "\0") { $a = ""; } } return $a; } sub shell_quote_scalar_default { # Quote for other shells my $a = $_[0]; if(defined $a) { # zsh wants '=' quoted # Solaris sh wants ^ quoted. # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; # This is 1% faster than the above if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + # quote newline as '\n' ($a =~ s/[\n]/'\n'/go)) { # A string was replaced # No need to test for "" or \0 } elsif($a eq "") { $a = "''"; } elsif($a eq "\0") { $a = ""; } } return $a; } sub shell_quote_scalar { # Quote the string so the shell will not expand any special chars # Inputs: # $string = string to be quoted # Returns: # $shell_quoted = string quoted as needed by the shell # Speed optimization: Choose the correct shell_quote_scalar_* # and call that directly from now on no warnings 'redefine'; if($Global::shell =~ m:(^|/)t?csh$:) { # (t)csh *shell_quote_scalar = \&shell_quote_scalar_csh; } elsif($Global::shell =~ m:(^|/)rc$:) { # rc-shell *shell_quote_scalar = \&shell_quote_scalar_rc; } else { # other shells *shell_quote_scalar = \&shell_quote_scalar_default; } # The sub is now redefined. Call it return shell_quote_scalar(@_); } sub shell_quote_file { # Quote the string so shell will not expand any special chars and prepend ./ if needed # Input: # $filename = filename to be shell quoted # Returns: # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed my $a = shell_quote_scalar(shift); if(defined $a) { if($a =~ m:^/: or $a =~ m:^\./:) { # /abs/path or ./rel/path => skip } else { # rel/path => ./rel/path $a = "./".$a; } } return $a; } sub shellwords { # Input: # $string = shell line # Returns: # @shell_words = $string split into words as shell would do $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; return Text::ParseWords::shellwords(@_); } sub perl_quote_scalar { # Quote the string so perl's eval will not expand any special chars # Inputs: # $string = string to be quoted # Returns: # $shell_quoted = string quoted with \ as needed by perl's eval my $a = $_[0]; if(defined $a) { $a =~ s/[\\\"\$\@]/\\$&/go; } return $a; } sub unquote_printf { # Convert \t \n \r \000 \0 $_ = shift; s/\\t/\t/g; s/\\n/\n/g; s/\\r/\r/g; s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge; s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge; return $_; } sub __FILEHANDLES__ {} sub save_stdin_stdout_stderr { # Remember the original STDIN, STDOUT and STDERR # and file descriptors opened by the shell (e.g. 3>/tmp/foo) # Uses: # %Global::fd # $Global::original_stderr # $Global::original_stdin # Returns: N/A # Find file descriptors that are already opened (by the shell) for my $fdno (1..61) { # /dev/fd/62 and above are used by bash for <(cmd) my $fh; # 2-argument-open is used to be compatible with old perl 5.8.0 # bug #43570: Perl 5.8.0 creates 61 files if(open($fh,">&=$fdno")) { $Global::fd{$fdno}=$fh; } } open $Global::original_stderr, ">&", "STDERR" or ::die_bug("Can't dup STDERR: $!"); open $Global::status_fd, ">&", "STDERR" or ::die_bug("Can't dup STDERR: $!"); open $Global::original_stdin, "<&", "STDIN" or ::die_bug("Can't dup STDIN: $!"); } sub enough_file_handles { # Check that we have enough filehandles available for starting # another job # Uses: # $opt::ungroup # %Global::fd # Returns: # 1 if ungrouped (thus not needing extra filehandles) # 0 if too few filehandles # 1 if enough filehandles if(not $opt::ungroup) { my %fh; my $enough_filehandles = 1; # perl uses 7 filehandles for something? # open3 uses 2 extra filehandles temporarily # We need a filehandle for each redirected file descriptor # (normally just STDOUT and STDERR) for my $i (1..(7+2+keys %Global::fd)) { $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); } for (values %fh) { close $_; } return $enough_filehandles; } else { # Ungrouped does not need extra file handles return 1; } } sub open_or_exit { # Open a file name or exit if the file cannot be opened # Inputs: # $file = filehandle or filename to open # Uses: # $Global::stdin_in_opt_a # $Global::original_stdin # Returns: # $fh = file handle to read-opened file my $file = shift; if($file eq "-") { $Global::stdin_in_opt_a = 1; return ($Global::original_stdin || *STDIN); } if(ref $file eq "GLOB") { # This is an open filehandle return $file; } my $fh = gensym; if(not open($fh, "<", $file)) { ::error("Cannot open input file `$file': No such file or directory."); wait_and_exit(255); } return $fh; } sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {} # Variable structure: # # $Global::running{$pid} = Pointer to Job-object # @Global::virgin_jobs = Pointer to Job-object that have received no input # $Global::host{$sshlogin} = Pointer to SSHLogin-object # $Global::total_running = total number of running jobs # $Global::total_started = total jobs started # $Global::tty_taken = is the tty in use by a running job? # $Global::max_procs_file = filename if --jobs is given a filename # $Global::JobQueue = JobQueue object for the queue of jobs # $Global::timeoutq = queue of times where jobs timeout # $Global::newest_job = Job object of the most recent job started # $Global::newest_starttime = timestamp of $Global::newest_job # @Global::sshlogin # $Global::minimal_command_line_length = minimum length supported by all sshlogins # $Global::start_no_new_jobs = should more jobs be started? # $Global::original_stderr = file handle for STDERR when the program started # $Global::total_started = total number of jobs started # $Global::envvar = string to set the shell environment variables # $Global::joblog = filehandle of joblog # $Global::debug = Is debugging on? # $Global::exitstatus = status code of GNU Parallel # $Global::quoting = quote the command to run sub init_run_jobs { # Set Global variables and progress signal handlers # Do the copying of basefiles # Returns: N/A $Global::total_running = 0; $Global::total_started = 0; $Global::tty_taken = 0; $SIG{USR1} = \&list_running_jobs; $SIG{USR2} = \&toggle_progress; if(@opt::basefile) { setup_basefile(); } } { my $last_time; my %last_mtime; my $max_procs_file_last_mod; sub changed_procs_file { # If --jobs is a file and it is modfied: # Force recomputing of max_jobs_running for each $sshlogin # Uses: # $Global::max_procs_file # %Global::host # Returns: N/A if($Global::max_procs_file) { # --jobs filename my $mtime = (stat($Global::max_procs_file))[9]; $max_procs_file_last_mod ||= 0; if($mtime > $max_procs_file_last_mod) { # file changed: Force re-computing max_jobs_running $max_procs_file_last_mod = $mtime; for my $sshlogin (values %Global::host) { $sshlogin->set_max_jobs_running(undef); } } } } sub changed_sshloginfile { # If --slf is changed: # reload --slf # filter_hosts # setup_basefile # Uses: # @opt::sshloginfile # @Global::sshlogin # %Global::host # $opt::filter_hosts # Returns: N/A if(@opt::sshloginfile) { # Is --sshloginfile changed? for my $slf (@opt::sshloginfile) { my $actual_file = expand_slf_shorthand($slf); my $mtime = (stat($actual_file))[9]; $last_mtime{$actual_file} ||= $mtime; if($mtime - $last_mtime{$actual_file} > 1) { ::debug("run","--sshloginfile $actual_file changed. reload\n"); $last_mtime{$actual_file} = $mtime; # Reload $slf # Empty sshlogins @Global::sshlogin = (); for (values %Global::host) { # Don't start new jobs on any host # except the ones added back later $_->set_max_jobs_running(0); } # This will set max_jobs_running on the SSHlogins read_sshloginfile($actual_file); parse_sshlogin(); $opt::filter_hosts and filter_hosts(); setup_basefile(); } } } } sub start_more_jobs { # Run start_another_job() but only if: # * not $Global::start_no_new_jobs set # * not JobQueue is empty # * not load on server is too high # * not server swapping # * not too short time since last remote login # Uses: # %Global::host # $Global::start_no_new_jobs # $Global::JobQueue # $opt::pipe # $opt::load # $opt::noswap # $opt::delay # $Global::newest_starttime # Returns: # $jobs_started = number of jobs started my $jobs_started = 0; my $jobs_started_this_round = 0; if($Global::start_no_new_jobs) { return $jobs_started; } if(time - ($last_time||0) > 1) { # At most do this every second $last_time = time; changed_procs_file(); changed_sshloginfile(); } do { $jobs_started_this_round = 0; # This will start 1 job on each --sshlogin (if possible) # thus distribute the jobs on the --sshlogins round robin for my $sshlogin (values %Global::host) { if($Global::JobQueue->empty() and not $opt::pipe) { # No more jobs in the queue last; } debug("run", "Running jobs before on ", $sshlogin->string(), ": ", $sshlogin->jobs_running(), "\n"); if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) { # It has been too short since last start next; } if($opt::load and $sshlogin->loadavg_too_high()) { # The load is too high or unknown next; } if($opt::noswap and $sshlogin->swapping()) { # The server is swapping next; } if($opt::memfree and $sshlogin->memfree() < $opt::memfree) { # The server has not enough mem free ::debug("mem", "Not starting job: not enough mem\n"); next; } if($sshlogin->too_fast_remote_login()) { # It has been too short since next; } debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(), " out of ", $sshlogin->max_jobs_running(), " jobs running. Start another.\n"); if(start_another_job($sshlogin) == 0) { # No more jobs to start on this $sshlogin debug("run","No jobs started on ", $sshlogin->string(), "\n"); next; } $sshlogin->inc_jobs_running(); $sshlogin->set_last_login_at(::now()); $jobs_started++; $jobs_started_this_round++; } debug("run","Running jobs after on ", $sshlogin->string(), ": ", $sshlogin->jobs_running(), " of ", $sshlogin->max_jobs_running(), "\n"); } } while($jobs_started_this_round); return $jobs_started; } } { my $no_more_file_handles_warned; sub start_another_job { # If there are enough filehandles # and JobQueue not empty # and not $job is in joblog # Then grab a job from Global::JobQueue, # start it at sshlogin # mark it as virgin_job # Inputs: # $sshlogin = the SSHLogin to start the job on # Uses: # $Global::JobQueue # $opt::pipe # $opt::results # $opt::resume # @Global::virgin_jobs # Returns: # 1 if another jobs was started # 0 otherwise my $sshlogin = shift; # Do we have enough file handles to start another job? if(enough_file_handles()) { if($Global::JobQueue->empty() and not $opt::pipe) { # No more commands to run debug("start", "Not starting: JobQueue empty\n"); return 0; } else { my $job; # Skip jobs already in job log # Skip jobs already in results do { $job = get_job_with_sshlogin($sshlogin); if(not defined $job) { # No command available for that sshlogin debug("start", "Not starting: no jobs available for ", $sshlogin->string(), "\n"); return 0; } } while ($job->is_already_in_joblog() or ($opt::results and $opt::resume and $job->is_already_in_results())); debug("start", "Command to run on '", $job->sshlogin()->string(), "': '", $job->replaced(),"'\n"); if($job->start()) { if($opt::pipe) { if($job->virgin()) { push(@Global::virgin_jobs,$job); } else { # Block already set: This is a retry if(fork()) { ::debug("pipe","\n\nWriting ",length ${$job->block_ref()}, " to ", $job->seq(),"\n"); close $job->fh(0,"w"); } else { $job->write($job->block_ref()); close $job->fh(0,"w"); exit(0); } } } debug("start", "Started as seq ", $job->seq(), " pid:", $job->pid(), "\n"); return 1; } else { # Not enough processes to run the job. # Put it back on the queue. $Global::JobQueue->unget($job); # Count down the number of jobs to run for this SSHLogin. my $max = $sshlogin->max_jobs_running(); if($max > 1) { $max--; } else { my @arg; for my $record (@{$job->{'commandline'}->{'arg_list'}}) { push @arg, map { $_->orig() } @$record; } ::error("No more processes: cannot run a single job. Something is wrong at @arg."); ::wait_and_exit(255); } $sshlogin->set_max_jobs_running($max); # Sleep up to 300 ms to give other processes time to die ::usleep(rand()*300); ::warning("No more processes: ". "Decreasing number of running jobs to $max.", "Raising ulimit -u or /etc/security/limits.conf may help."); return 0; } } } else { # No more file handles $no_more_file_handles_warned++ or ::warning("No more file handles. ", "Raising ulimit -n or /etc/security/limits.conf may help."); return 0; } } } sub init_progress { # Uses: # $opt::bar # Returns: # list of computers for progress output $|=1; if($opt::bar) { return("",""); } my %progress = progress(); return ("\nComputers / CPU cores / Max jobs to run\n", $progress{'workerlist'}); } sub drain_job_queue { # Uses: # $opt::progress # $Global::total_running # $Global::max_jobs_running # %Global::running # $Global::JobQueue # %Global::host # $Global::start_no_new_jobs # Returns: N/A if($opt::progress) { ::status(init_progress()); } my $last_header = ""; my $sleep = 0.2; do { while($Global::total_running > 0) { debug($Global::total_running, "==", scalar keys %Global::running," slots: ", $Global::max_jobs_running); if($opt::pipe) { # When using --pipe sometimes file handles are not closed properly for my $job (values %Global::running) { close $job->fh(0,"w"); } } if($opt::progress) { my %progress = progress(); if($last_header ne $progress{'header'}) { ::status("\n", $progress{'header'}, "\n"); $last_header = $progress{'header'}; } ::status("\r",$progress{'status'}); } if($Global::total_running < $Global::max_jobs_running and not $Global::JobQueue->empty()) { # These jobs may not be started because of loadavg # or too little time between each ssh login. if(start_more_jobs() > 0) { # Exponential back-on if jobs were started $sleep = $sleep/2+0.001; } } # Exponential back-off sleeping $sleep = ::reap_usleep($sleep); } if(not $Global::JobQueue->empty()) { # These jobs may not be started: # * because there the --filter-hosts has removed all if(not %Global::host) { ::error("There are no hosts left to run on."); ::wait_and_exit(255); } # * because of loadavg # * because of too little time between each ssh login. start_more_jobs(); $sleep = ::reap_usleep($sleep); if($Global::max_jobs_running == 0) { ::warning("There are no job slots available. Increase --jobs."); } } while($opt::sql and not $Global::sql->finished()) { # SQL master $sleep = ::reap_usleep($sleep); if($Global::sqlworker) { # Start an SQL worker as we are now sure there is work to do $Global::sqlworker = 0; if(fork()) { # skip } else { # Replace --sql/--sqlandworker with --sqlworker my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv; # exec the --sqlworker exec($0,::shell_quote(@ARGV),@command); } } } } while ($Global::total_running > 0 or not $Global::start_no_new_jobs and not $Global::JobQueue->empty() or $opt::sql and not $Global::sql->finished()); if($opt::progress) { my %progress = progress(); ::status("\r", $progress{'status'}, "\n"); } } sub toggle_progress { # Turn on/off progress view # Uses: # $opt::progress # Returns: N/A $opt::progress = not $opt::progress; if($opt::progress) { ::status(init_progress()); } } sub progress { # Uses: # $opt::bar # $opt::eta # %Global::host # $Global::total_started # Returns: # $workerlist = list of workers # $header = that will fit on the screen # $status = message that will fit on the screen if($opt::bar) { return ("workerlist" => "", "header" => "", "status" => bar()); } my $eta = ""; my ($status,$header)=("",""); if($opt::eta) { my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = compute_eta(); $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", $this_eta, $left, $avgtime); } my $termcols = terminal_columns(); my @workers = sort keys %Global::host; my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; my $workerno = 1; my %workerno = map { ($_=>$workerno++) } @workers; my $workerlist = ""; for my $w (@workers) { $workerlist .= $workerno{$w}.":".$sshlogin{$w} ." / ". ($Global::host{$w}->ncpus() || "-")." / ". $Global::host{$w}->max_jobs_running()."\n"; } $status = "x"x($termcols+1); # Select an output format that will fit on a single line if(length $status > $termcols) { # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete"; $status = $eta . join(" ",map { if($Global::total_started) { my $completed = ($Global::host{$_}->jobs_completed()||0); my $running = $Global::host{$_}->jobs_running(); my $time = $completed ? (time-$^T)/($completed) : "0"; sprintf("%s:%d/%d/%d%%/%.1fs ", $sshlogin{$_}, $running, $completed, ($running+$completed)*100 / $Global::total_started, $time); } } @workers); } if(length $status > $termcols) { # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs $header = "Computer:jobs running/jobs completed/%of started jobs"; $status = $eta . join(" ",map { my $completed = ($Global::host{$_}->jobs_completed()||0); my $running = $Global::host{$_}->jobs_running(); my $time = $completed ? (time-$^T)/($completed) : "0"; sprintf("%s:%d/%d/%d%%/%.1fs ", $workerno{$_}, $running, $completed, ($running+$completed)*100 / $Global::total_started, $time); } @workers); } if(length $status > $termcols) { # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% $header = "Computer:jobs running/jobs completed/%of started jobs"; $status = $eta . join(" ",map { sprintf("%s:%d/%d/%d%%", $sshlogin{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0), ($Global::host{$_}->jobs_running()+ ($Global::host{$_}->jobs_completed()||0))*100 / $Global::total_started) } @workers); } if(length $status > $termcols) { # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX% $header = "Computer:jobs running/jobs completed/%of started jobs"; $status = $eta . join(" ",map { sprintf("%s:%d/%d/%d%%", $workerno{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0), ($Global::host{$_}->jobs_running()+ ($Global::host{$_}->jobs_completed()||0))*100 / $Global::total_started) } @workers); } if(length $status > $termcols) { # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX $header = "Computer:jobs running/jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d/%d", $sshlogin{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0)) } @workers); } if(length $status > $termcols) { # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX $header = "Computer:jobs running/jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d/%d", $sshlogin{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0)) } @workers); } if(length $status > $termcols) { # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX $header = "Computer:jobs running/jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d/%d", $workerno{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0)) } @workers); } if(length $status > $termcols) { # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX $header = "Computer:jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d", $sshlogin{$_}, ($Global::host{$_}->jobs_completed()||0)) } @workers); } if(length $status > $termcols) { # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX $header = "Computer:jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d", $workerno{$_}, ($Global::host{$_}->jobs_completed()||0)) } @workers); } return ("workerlist" => $workerlist, "header" => $header, "status" => $status); } { my ($total, $first_completed, $smoothed_avg_time); sub compute_eta { # Calculate important numbers for ETA # Returns: # $total = number of jobs in total # $completed = number of jobs completed # $left = number of jobs left # $pctcomplete = percent of jobs completed # $avgtime = averaged time # $eta = smoothed eta $total ||= $Global::JobQueue->total_jobs(); my $completed = $Global::total_completed; my $left = $total - $completed; if(not $completed) { return($total, $completed, $left, 0, 0, 0); } my $pctcomplete = $completed / $total; $first_completed ||= time; my $timepassed = (time - $first_completed); my $avgtime = $timepassed / $completed; $smoothed_avg_time ||= $avgtime; # Smooth the eta so it does not jump wildly $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + $pctcomplete * $avgtime; my $eta = int($left * $smoothed_avg_time); return($total, $completed, $left, $pctcomplete, $avgtime, $eta); } } { my ($rev,$reset); sub bar { # Return: # $status = bar with eta, completed jobs, arg and pct $rev ||= "\033[7m"; $reset ||= "\033[0m"; my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = compute_eta(); my $arg = $Global::newest_job ? $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; # These chars mess up display in the terminal $arg =~ tr/[\011-\016\033\302-\365]//d; my $bar_text = sprintf("%d%% %d:%d=%ds %s", $pctcomplete*100, $completed, $left, $eta, $arg); my $terminal_width = terminal_columns(); my $s = sprintf("%-${terminal_width}s", substr($bar_text." "x$terminal_width, 0,$terminal_width)); my $width = int($terminal_width * $pctcomplete); substr($s,$width,0) = $reset; my $zenity = sprintf("%-${terminal_width}s", substr("# $eta sec $arg", 0,$terminal_width)); $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header "\r" . $rev . $s . $reset; return $s; } } { my ($columns,$last_column_time); sub terminal_columns { # Get the number of columns of the terminal. # Only update once per second. # Returns: # number of columns of the screen if(not $columns or $last_column_time < time) { $last_column_time = time; $columns = $ENV{'COLUMNS'}; if(not $columns) { my $stty = ::qqx("stty -a </dev/tty"); # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS # MacOSX/IRIX/AIX/Tru64 $stty =~ /(\d+) columns/ and do { $columns = $1; }; # GNU/Linux/Solaris $stty =~ /columns (\d+)/ and do { $columns = $1; }; # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana $stty =~ /columns = (\d+)/ and do { $columns = $1; }; # QNX $stty =~ /rows=\d+,(\d+)/ and do { $columns = $1; }; } if(not $columns) { my $resize = ::qqx("resize"); $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; }; } $columns ||= 80; } return $columns; } } sub get_job_with_sshlogin { # Input: # $sshlogin = which host should the job be run on? # Uses: # $opt::hostgroups # $Global::JobQueue # Returns: # $job = next job object for $sshlogin if any available my $sshlogin = shift; my $job; if ($opt::hostgroups) { my @other_hostgroup_jobs = (); while($job = $Global::JobQueue->get()) { if($sshlogin->in_hostgroups($job->hostgroups())) { # Found a job to be run on a hostgroup of this # $sshlogin last; } else { # This job was not in the hostgroups of $sshlogin push @other_hostgroup_jobs, $job; } } $Global::JobQueue->unget(@other_hostgroup_jobs); if(not defined $job) { # No more jobs return undef; } } else { $job = $Global::JobQueue->get(); if(not defined $job) { # No more jobs ::debug("start", "No more jobs: JobQueue empty\n"); return undef; } } my $clean_command = $job->replaced(); if($clean_command =~ /^\s*$/) { # Do not run empty lines if(not $Global::JobQueue->empty()) { return get_job_with_sshlogin($sshlogin); } else { return undef; } } $job->set_sshlogin($sshlogin); if($opt::retries and $clean_command and $job->failed_here()) { # This command with these args failed for this sshlogin my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); # Only look at the Global::host that have > 0 jobslots if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host and $job->failed_here() == $min_failures) { # It failed the same or more times on another host: # run it on this host } else { # If it failed fewer times on another host: # Find another job to run my $nextjob; if(not $Global::JobQueue->empty()) { # This can potentially recurse for all args no warnings 'recursion'; $nextjob = get_job_with_sshlogin($sshlogin); } # Push the command back on the queue $Global::JobQueue->unget($job); return $nextjob; } } return $job; } sub __REMOTE_SSH__ {} sub read_sshloginfiles { # Read a list of --slf's # Input: # @files = files or symbolic file names to read # Returns: N/A for my $s (@_) { read_sshloginfile(expand_slf_shorthand($s)); } } sub expand_slf_shorthand { # Expand --slf shorthand into a read file name # Input: # $file = file or symbolic file name to read # Returns: # $file = actual file name to read my $file = shift; if($file eq "-") { # skip: It is stdin } elsif($file eq "..") { $file = $ENV{'HOME'}."/.parallel/sshloginfile"; } elsif($file eq ".") { $file = "/etc/parallel/sshloginfile"; } elsif(not -r $file) { if(not -r $ENV{'HOME'}."/.parallel/".$file) { # Try prepending ~/.parallel ::error("Cannot open $file."); ::wait_and_exit(255); } else { $file = $ENV{'HOME'}."/.parallel/".$file; } } return $file; } sub read_sshloginfile { # Read sshloginfile into @Global::sshlogin # Input: # $file = file to read # Uses: # @Global::sshlogin # Returns: N/A my $file = shift; my $close = 1; my $in_fh; ::debug("init","--slf ",$file); if($file eq "-") { $in_fh = *STDIN; $close = 0; } else { if(not open($in_fh, "<", $file)) { # Try the filename ::error("Cannot open $file."); ::wait_and_exit(255); } } while(<$in_fh>) { chomp; /^\s*#/ and next; /^\s*$/ and next; push @Global::sshlogin, $_; } if($close) { close $in_fh; } } sub parse_sshlogin { # Parse @Global::sshlogin into %Global::host. # Keep only hosts that are in one of the given ssh hostgroups. # Uses: # @Global::sshlogin # $Global::minimal_command_line_length # %Global::host # $opt::transfer # @opt::return # $opt::cleanup # @opt::basefile # @opt::trc # Returns: N/A my @login; if(not @Global::sshlogin) { @Global::sshlogin = (":"); } for my $sshlogin (@Global::sshlogin) { # Split up -S sshlogin,sshlogin for my $s (split /,|\n/, $sshlogin) { if ($s eq ".." or $s eq "-") { # This may add to @Global::sshlogin - possibly bug read_sshloginfile(expand_slf_shorthand($s)); } else { $s =~ s/\s*$//; push (@login, $s); } } } $Global::minimal_command_line_length = 8_000_000; my @allowed_hostgroups; for my $ncpu_sshlogin_string (::uniq(@login)) { my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); my $sshlogin_string = $sshlogin->string(); if($sshlogin_string eq "") { # This is an ssh group: -S @webservers push @allowed_hostgroups, $sshlogin->hostgroups(); next; } if($Global::host{$sshlogin_string}) { # This sshlogin has already been added: # It is probably a host that has come back # Set the max_jobs_running back to the original debug("run","Already seen $sshlogin_string\n"); if($sshlogin->{'ncpus'}) { # If ncpus set by '#/' of the sshlogin, overwrite it: $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); } $Global::host{$sshlogin_string}->set_max_jobs_running(undef); next; } if($sshlogin_string eq ":") { $sshlogin->set_maxlength(Limits::Command::max_length()); } else { # If all chars needs to be quoted, every other character will be \ $sshlogin->set_maxlength(int(Limits::Command::max_length()/2)); } $Global::minimal_command_line_length = ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); $Global::host{$sshlogin_string} = $sshlogin; } if(@allowed_hostgroups) { # Remove hosts that are not in these groups while (my ($string, $sshlogin) = each %Global::host) { if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { delete $Global::host{$string}; } } } # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); if(@Global::transfer_files or @opt::return or $opt::cleanup or @opt::basefile) { if(not remote_hosts()) { # There are no remote hosts if(@opt::trc) { ::warning("--trc ignored as there are no remote --sshlogin."); } elsif (defined $opt::transfer) { ::warning("--transfer ignored as there are no remote --sshlogin."); } elsif (@opt::transfer_files) { ::warning("--transferfile ignored as there are no remote --sshlogin."); } elsif (@opt::return) { ::warning("--return ignored as there are no remote --sshlogin."); } elsif (defined $opt::cleanup) { ::warning("--cleanup ignored as there are no remote --sshlogin."); } elsif (@opt::basefile) { ::warning("--basefile ignored as there are no remote --sshlogin."); } } } } sub remote_hosts { # Return sshlogins that are not ':' # Uses: # %Global::host # Returns: # list of sshlogins with ':' removed return grep !/^:$/, keys %Global::host; } sub setup_basefile { # Transfer basefiles to each $sshlogin # This needs to be done before first jobs on $sshlogin is run # Uses: # %Global::host # @opt::basefile # Returns: N/A my $cmd = ""; my $rsync_destdir; my $workdir; for my $sshlogin (values %Global::host) { if($sshlogin->string() eq ":") { next } for my $file (@opt::basefile) { if($file !~ m:^/: and $opt::workdir eq "...") { ::error("Work dir '...' will not work with relative basefiles."); ::wait_and_exit(255); } $workdir ||= Job->new("")->workdir(); $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&"; } } $cmd .= "wait;"; debug("init", "basesetup: $cmd\n"); print `$cmd`; } sub cleanup_basefile { # Remove the basefiles transferred # Uses: # %Global::host # @opt::basefile # Returns: N/A my $cmd = ""; my $workdir = Job->new("")->workdir(); for my $sshlogin (values %Global::host) { if($sshlogin->string() eq ":") { next } for my $file (@opt::basefile) { $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&"; } } $cmd .= "wait;"; debug("init", "basecleanup: $cmd\n"); print `$cmd`; } sub filter_hosts { # Remove down --sshlogins from active duty. # Find ncpus, ncores, maxlen, time-to-login for each host. # Uses: # %Global::host # $Global::minimal_command_line_length # $opt::use_cpus_instead_of_cores # Returns: N/A my ($ncores_ref, $ncpus_ref, $time_to_login_ref, $maxlen_ref, $echo_ref, $down_hosts_ref) = parse_host_filtering(parallelized_host_filtering()); delete @Global::host{@$down_hosts_ref}; @$down_hosts_ref and ::warning("Removed @$down_hosts_ref."); $Global::minimal_command_line_length = 8_000_000; while (my ($sshlogin, $obj) = each %Global::host) { if($sshlogin eq ":") { next } $ncpus_ref->{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); $ncores_ref->{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); $time_to_login_ref->{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); $maxlen_ref->{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); if($opt::use_cpus_instead_of_cores) { $obj->set_ncpus($ncpus_ref->{$sshlogin}); } else { $obj->set_ncpus($ncores_ref->{$sshlogin}); } $obj->set_time_to_login($time_to_login_ref->{$sshlogin}); $obj->set_maxlength($maxlen_ref->{$sshlogin}); $Global::minimal_command_line_length = ::min($Global::minimal_command_line_length, int($maxlen_ref->{$sshlogin}/2)); ::debug("init", "Timing from -S:$sshlogin ", " ncpus:",$ncpus_ref->{$sshlogin}, " ncores:", $ncores_ref->{$sshlogin}, " time_to_login:", $time_to_login_ref->{$sshlogin}, " maxlen:", $maxlen_ref->{$sshlogin}, " min_max_len:", $Global::minimal_command_line_length,"\n"); } } sub parse_host_filtering { # Input: # @lines = output from parallelized_host_filtering() # Returns: # \%ncores = number of cores of {host} # \%ncpus = number of cpus of {host} # \%time_to_login = time_to_login on {host} # \%maxlen = max command len on {host} # \%echo = echo received from {host} # \@down_hosts = list of hosts with no answer my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); for (@_) { ::debug("init",$_); chomp; my @col = split /\t/, $_; if(defined $col[6]) { # This is a line from --joblog # seq host time spent sent received exit signal command # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores if($col[0] eq "Seq" and $col[1] eq "Host" and $col[2] eq "Starttime") { # Header => skip next; } # Get server from: eval true server\; $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); my $host = $1; $host =~ tr/\\//d; $Global::host{$host} or next; if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") { # exit == 255 or exit == timeout (-1): ssh failed/timedout # exit == 1: lsh failed # Remove sshlogin ::debug("init", "--filtered $host\n"); push(@down_hosts, $host); } elsif($col[6] eq "127") { # signal == 127: parallel not installed remote # Set ncpus and ncores = 1 ::warning("Could not figure out ", "number of cpus on $host. Using 1."); $ncores{$host} = 1; $ncpus{$host} = 1; $maxlen{$host} = Limits::Command::max_length(); } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { # Remember how log it took to log in # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); } else { ::die_bug("host check unmatched long jobline: $_"); } } elsif($Global::host{$col[0]}) { # This output from --number-of-cores, --number-of-cpus, # --max-line-length-allowed # ncores: server 8 # ncpus: server 2 # maxlen: server 131071 if(not $ncores{$col[0]}) { $ncores{$col[0]} = $col[1]; } elsif(not $ncpus{$col[0]}) { $ncpus{$col[0]} = $col[1]; } elsif(not $maxlen{$col[0]}) { $maxlen{$col[0]} = $col[1]; } elsif(not $echo{$col[0]}) { $echo{$col[0]} = $col[1]; } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { # Skip these: # perl: warning: Setting locale failed. # perl: warning: Please check that your locale settings: # LANGUAGE = (unset), # LC_ALL = (unset), # LANG = "en_US.UTF-8" # are supported and installed on your system. # perl: warning: Falling back to the standard locale ("C"). } else { ::die_bug("host check too many col0: $_"); } } else { ::die_bug("host check unmatched short jobline ($col[0]): $_"); } } @down_hosts = uniq(@down_hosts); return(\%ncores, \%ncpus, \%time_to_login, \%maxlen, \%echo, \@down_hosts); } sub parallelized_host_filtering { # Uses: # $Global::envvar # %Global::host # Returns: # text entries with: # * joblog line # * hostname \t number of cores # * hostname \t number of cpus # * hostname \t max-line-length-allowed # * hostname \t empty my(@cores, @cpus, @maxline, @echo); my $envvar = ::shell_quote_scalar($Global::envvar); while (my ($host, $sshlogin) = each %Global::host) { if($host eq ":") { next } # The 'true' is used to get the $host out later my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ". $sshlogin->serverlogin(); push(@cores, $host."\t".$sshcmd." -- ".$envvar. " parallel --number-of-cores\n\0"); push(@cpus, $host."\t".$sshcmd." -- ".$envvar. " parallel --number-of-cpus\n\0"); push(@maxline, $host."\t".$sshcmd." -- ".$envvar. " parallel --max-line-length-allowed\n\0"); # 'echo' is used to get the best possible value for an ssh login time push(@echo, $host."\t".$sshcmd." -- echo\n\0"); } my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh"); print $fh @cores, @cpus, @maxline, @echo; close $fh; # --timeout 10: Setting up an SSH connection and running a simple # command should never take > 10 sec. # --delay 0.1: If multiple sshlogins use the same proxy the delay # will make it less likely to overload the ssh daemon. # --retries 3: If the ssh daemon it overloaded, try 3 times # -s 16000: Half of the max line on UnixWare # TODO sh -c wrapper to work in csh my $unlinkcmd = $Global::debug ? "true" : "rm $tmpfile"; my $cmd = "($unlinkcmd; cat -) < $tmpfile | ". "$0 -j0 --timeout 10 -s 16000 --joblog - --plain --delay 0.1 --retries 3 ". "--tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; ::debug("init", $cmd, "\n"); my @out; my $prepend = ""; open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); for(<$host_fh>) { if(/\'$/) { # if last char = ' then append next line # This may be due to quoting of $Global::envvar $prepend .= $_; next; } $_ = $prepend . $_; $prepend = ""; push @out, $_; } close $host_fh; return @out; } sub onall { # Runs @command on all hosts. # Uses parallel to run @command on each host. # --jobs = number of hosts to run on simultaneously. # For each host a parallel command with the args will be running. # Uses: # $Global::quoting # @opt::basefile # $opt::jobs # $opt::linebuffer # $opt::ungroup # $opt::group # $opt::keeporder # $opt::D # $opt::plain # $opt::max_chars # $opt::linebuffer # $opt::files # $opt::colsep # $opt::timeout # $opt::plain # $opt::retries # $opt::max_chars # $opt::arg_sep # $opt::arg_file_sep # @opt::v # @opt::env # %Global::host # $Global::exitstatus # $Global::debug # $Global::joblog # $opt::tag # $opt::joblog # Input: # @command = command to run on all hosts # Returns: N/A sub tmp_joblog { # Input: # $joblog = filename of joblog - undef if none # Returns: # $tmpfile = temp file for joblog - undef if none my $joblog = shift; if(not defined $joblog) { return undef; } my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); close $fh; return $tmpfile; } my ($input_source_fh_ref,@command) = @_; if($Global::quoting) { @command = shell_quote(@command); } # Copy all @input_source_fh (-a and :::) into tempfiles my @argfiles = (); for my $fh (@$input_source_fh_ref) { my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D); print $outfh (<$fh>); close $outfh; push @argfiles, $name; } if(@opt::basefile) { setup_basefile(); } # for each sshlogin do: # parallel -S $sshlogin $command :::: @argfiles # # Pass some of the options to the sub-parallels, not all of them as # -P should only go to the first, and -S should not be copied at all. my $options = join(" ", ((defined $opt::D) ? "-D $opt::D" : ""), ((defined $opt::group) ? "-g" : ""), ((defined $opt::jobs) ? "-P $opt::jobs" : ""), ((defined $opt::keeporder) ? "--keeporder" : ""), ((defined $opt::linebuffer) ? "--linebuffer" : ""), ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), ((defined $opt::plain) ? "--plain" : ""), ((defined $opt::ungroup) ? "-u" : ""), ); my $suboptions = join(" ", ((defined $opt::D) ? "-D $opt::D" : ""), ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), ((defined $opt::files) ? "--files" : ""), ((defined $opt::group) ? "-g" : ""), ((defined $opt::cleanup) ? "--cleanup" : ""), ((defined $opt::keeporder) ? "--keeporder" : ""), ((defined $opt::linebuffer) ? "--linebuffer" : ""), ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), ((defined $opt::plain) ? "--plain" : ""), ((defined $opt::retries) ? "--retries ".$opt::retries : ""), ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), ((defined $opt::ungroup) ? "-u" : ""), ((defined $opt::workdir) ? "--wd ".$opt::workdir : ""), (@Global::transfer_files ? map { "--tf ".::shell_quote_scalar($_) } @Global::transfer_files : ""), (@Global::ret_files ? map { "--return ".::shell_quote_scalar($_) } @Global::ret_files : ""), (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), (@opt::v ? "-vv" : ""), ); ::debug("init", "| $0 $options\n"); open(my $parallel_fh, "|-", "$0 --will-cite -j0 $options") || ::die_bug("This does not run GNU Parallel: $0 $options"); my @joblogs; for my $host (sort keys %Global::host) { my $sshlogin = $Global::host{$host}; my $joblog = tmp_joblog($opt::joblog); if($joblog) { push @joblogs, $joblog; $joblog = "--joblog $joblog"; } my $quad = $opt::arg_file_sep || "::::"; ::debug("init", "$0 $suboptions -j1 $joblog ", ((defined $opt::tag) ? "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), " -S ", shell_quote_scalar($sshlogin->string())," ", join(" ",shell_quote(@command))," $quad @argfiles\n"); print $parallel_fh "$0 $suboptions -j1 $joblog ", ((defined $opt::tag) ? "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), " -S ", shell_quote_scalar($sshlogin->string())," ", join(" ",shell_quote(@command))," $quad @argfiles\n"; } close $parallel_fh; $Global::exitstatus = $? >> 8; debug("init", "--onall exitvalue ", $?); if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } $Global::debug or unlink(@argfiles); my %seen; for my $joblog (@joblogs) { # Append to $joblog open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); # Skip first line (header); <$fh>; print $Global::joblog (<$fh>); close $fh; unlink($joblog); } } sub __SIGNAL_HANDLING__ {} sub tstp { # Send TSTP signal (Ctrl-Z) to all children process groups # Uses: # %SIG # Returns: N/A kill "TSTP", map { -$_ } keys %Global::running; # Use default signal handler to suspend GNU Parallel self $SIG{TSTP} = undef; kill "TSTP", $$; } sub save_original_signal_handler { # Remember the original signal handler # Uses: # %Global::original_sig # Returns: N/A $SIG{INT} = sub { if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } wait_and_exit(255); }; $SIG{TERM} = sub { if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } wait_and_exit(255); }; %Global::original_sig = %SIG; $SIG{TERM} = sub {}; # Dummy until jobs really start $SIG{ALRM} = 'IGNORE'; # Allow Ctrl-Z to suspend and `fg` to continue $SIG{TSTP} = \&tstp; $SIG{CONT} = sub { # Set $SIG{TSTP} again (it is undef'ed in tstp() ) $SIG{TSTP} = \&tstp; # Send continue signal to all children process groups kill "CONT", map { -$_ } keys %Global::running; }; } sub list_running_jobs { # Print running jobs on tty # Uses: # %Global::running # Returns: N/A for my $job (values %Global::running) { ::status("$Global::progname: ",$job->replaced(),"\n"); } } sub start_no_new_jobs { # Start no more jobs # Uses: # %Global::original_sig # %Global::unlink # $Global::start_no_new_jobs # Returns: N/A $SIG{TERM} = $Global::original_sig{TERM}; unlink keys %Global::unlink; ::status ("$Global::progname: SIGTERM received. No new jobs will be started.\n", "$Global::progname: Waiting for these ", scalar(keys %Global::running), " jobs to finish. Send SIGTERM again to stop now.\n"); list_running_jobs(); $Global::start_no_new_jobs ||= 1; } sub reaper { # A job finished. # Print the output. # Start another job # Uses: # %Global::sshmaster # %Global::running # $Global::tty_taken # @Global::slots # $opt::timeout # $Global::timeoutq # $opt::halt # $opt::keeporder # $Global::total_running # Returns: # @pids_reaped = PIDs of children finished my $stiff; my @pids_reaped; debug("run", "Reaper "); # For efficiency surround with BEGIN/COMMIT when using $opt::sql $opt::sql and $Global::sql->run("BEGIN;"); while (($stiff = waitpid(-1, &WNOHANG)) > 0) { # $stiff = pid of dead process push(@pids_reaped,$stiff); if($Global::sshmaster{$stiff}) { # This is one of the ssh -M: ignore next; } my $job = $Global::running{$stiff}; # '-a <(seq 10)' will give us a pid not in %Global::running $job or next; delete $Global::running{$stiff}; $Global::total_running--; if($job->{'commandline'}{'skip'}) { # $job->skip() was called $job->set_exitstatus(-2); $job->set_exitsignal(0); } else { $job->set_exitstatus($? >> 8); $job->set_exitsignal($? & 127); } debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")"); $job->set_endtime(::now()); if($stiff == $Global::tty_taken) { # The process that died had the tty => release it $Global::tty_taken = 0; } my $sshlogin = $job->sshlogin(); $sshlogin->dec_jobs_running(); $sshlogin->inc_jobs_completed(); if(not $job->should_be_retried()) { # The job is done # Free the jobslot push @Global::slots, $job->slot(); if($opt::timeout) { # Update average runtime for timeout $Global::timeoutq->update_median_runtime($job->runtime()); } if($opt::keeporder) { $job->print_earlier_jobs(); } else { $job->print(); } if($job->should_we_halt() eq "now") { # Kill children ::kill_sleep_seq($job->pid()); ::killall(); ::wait_and_exit($Global::halt_exitstatus); } } start_more_jobs(); if($opt::progress) { my %progress = progress(); ::status("\r",$progress{'status'}); } } $opt::sql and $Global::sql->run("COMMIT;"); debug("run", "done "); return @pids_reaped; } sub __USAGE__ {} sub killall { # Kill all jobs by killing their process groups $Global::start_no_new_jobs ||= 1; $Global::killall ||= 1; kill_sleep_seq(keys %Global::running); } sub kill_sleep_seq { # Send jobs TERM,TERM,KILL to processgroups # Input: # @pids = list of pids that are also processgroups # Convert pids to process groups ($processgroup = -$pid) my @pgrps = map { -$_ } @_; my @term_seq = split/,/,$opt::termseq; if(not @term_seq) { @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); } while(@term_seq) { @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps); } } sub kill_sleep { my ($signal, $sleep_max, @pids) = @_; ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n"); kill $signal, @pids; my $sleepsum = 0; my $sleep = 0.001; my @dead; while(@pids and $sleepsum < $sleep_max) { if($Global::killall) { # Killall => don't run reaper my $stiff; while (($stiff = waitpid(-1, &WNOHANG)) > 0) { # remove $stiff from @pids @pids = grep { $_ != $stiff } @pids; $sleep = $sleep/2+0.001; } } elsif(@dead = reaper()) { # Remove reaped pids for my $stiff (@dead) { @pids = grep { $_ != $stiff } @pids; } $sleep = $sleep/2+0.001; } @pids = grep { kill( 0, $_) } @pids; $sleep *= 1.1; ::usleep($sleep); $sleepsum += $sleep; # Remove dead children @pids = grep { kill( 0, $_) } @pids; } return @pids; } sub wait_and_exit { # If we do not wait, we sometimes get segfault # Returns: N/A my $error = shift; unlink keys %Global::unlink; if($error) { # Kill all jobs without printing killall(); } for (keys %Global::unkilled_children) { # Kill any (non-jobs) children kill 9, $_; waitpid($_,0); delete $Global::unkilled_children{$_}; } wait(); exit($error); } sub die_usage { # Returns: N/A usage(); wait_and_exit(255); } sub usage { # Returns: N/A print join ("\n", "Usage:", "", "$Global::progname [options] [command [arguments]] < list_of_arguments", "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", "cat ... | $Global::progname --pipe [options] [command [arguments]]", "", "-j n Run n jobs in parallel", "-k Keep same order", "-X Multiple arguments with context replace", "--colsep regexp Split input on regexp for positional replacements", "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", "", "-S sshlogin Example: foo\@server.example.com", "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", "--onall Run the given command with argument on all sshlogins", "--nonall Run the given command with no arguments on all sshlogins", "", "--pipe Split stdin (standard input) to multiple jobs.", "--recend str Record end separator for --pipe.", "--recstart str Record start separator for --pipe.", "", "See 'man $Global::progname' for details", "", "Academic tradition requires you to cite works you base your article on.", "When using programs that use GNU Parallel to process data for publication", "please cite:", "", " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,", " ;login: The USENIX Magazine, February 2011:42-47.", "", "This helps funding further development; AND IT WON'T COST YOU A CENT.", "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n", ""); } sub citation_notice { # if --will-cite or --plain: do nothing # if stderr redirected: do nothing # if ~/.parallel/will-cite: do nothing # else: print citation notice to stderr if($opt::willcite or $opt::plain or not -t $Global::original_stderr or -e $ENV{'HOME'}."/.parallel/will-cite") { # skip } else { ::status ("Academic tradition requires you to cite works you base your article on.\n", "When using programs that use GNU Parallel to process data for publication\n", "please cite:\n", "\n", " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n", " ;login: The USENIX Magazine, February 2011:42-47.\n", "\n", "This helps funding further development; AND IT WON'T COST YOU A CENT.\n", "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n", "\n", "To silence the citation notice: run 'parallel --bibtex'.\n\n", ); } } sub status { my @w = @_; my $fh = $Global::status_fd || *STDERR; print $fh @w; flush $fh; } sub warning { my @w = @_; my $prog = $Global::progname || "parallel"; status(map { ($prog, ": Warning: ", $_, "\n"); } @w); } sub error { my @w = @_; my $prog = $Global::progname || "parallel"; status(map { ($prog, ": Error: ", $_, "\n"); } @w); } sub die_bug { my $bugid = shift; print STDERR ("$Global::progname: This should not happen. You have found a bug.\n", "Please contact <parallel\@gnu.org> and include:\n", "* The version number: $Global::version\n", "* The bugid: $bugid\n", "* The command line being run\n", "* The files being read (put the files on a webserver if they are big)\n", "\n", "If you get the error on smaller/fewer files, please include those instead.\n"); ::wait_and_exit(255); } sub version { # Returns: N/A print join("\n", "GNU $Global::progname $Global::version", "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016", "Ole Tange and Free Software Foundation, Inc.", "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>", "This is free software: you are free to change and redistribute it.", "GNU $Global::progname comes with no warranty.", "", "Web site: http://www.gnu.org/software/${Global::progname}\n", "When using programs that use GNU Parallel to process data for publication", "please cite as described in 'parallel --bibtex'.\n", ); } sub bibtex { # Returns: N/A print join("\n", "Academic tradition requires you to cite works you base your article on.", "When using programs that use GNU Parallel to process data for publication", "please cite:", "", "\@article{Tange2011a,", " title = {GNU Parallel - The Command-Line Power Tool},", " author = {O. Tange},", " address = {Frederiksberg, Denmark},", " journal = {;login: The USENIX Magazine},", " month = {Feb},", " number = {1},", " volume = {36},", " url = {http://www.gnu.org/s/parallel},", " year = {2011},", " pages = {42-47},", " doi = {10.5281/zenodo.16303}", "}", "", "(Feel free to use \\nocite{Tange2011a})", "", "This helps funding further development; AND IT WON'T COST YOU A CENT.", "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", "", "If you send a copy of your published article to tange\@gnu.org, it will be", "mentioned in the release notes of next version of GNU Parallel.\n\n", ); while(not -e $ENV{'HOME'}."/.parallel/will-cite") { print "\nType: 'will cite' and press enter.\n> "; my $input = <STDIN>; if($input =~ /will cite/i) { mkdir $ENV{'HOME'}."/.parallel"; if(open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite")) { close $fh; print "\nThank you for your support. It is much appreciated. The citation\n", "notice is now silenced. For other ways to silence the citation notice\n", "see 'man parallel' under '--bibtex'.\n\n"; } else { print "\nThank you for your support. It is much appreciated. The citation\n", "cannot permanently be silenced. Use '--will-cite' instead.\n", "If you use '--will-cite' in scripts you are making it harder to see the\n", "citation notice. However, if you pay 10000 EUR, you should feel free\n", "to use '--will-cite'.\n\n"; last; } } } } sub show_limits { # Returns: N/A print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", "Maximal used size of command: ",Limits::Command::max_length(),"\n", "\n", "Execution of will continue now, and it will try to read its input\n", "and run commands; if this is not what you wanted to happen, please\n", "press CTRL-D or CTRL-C\n"); } sub __GENERIC_COMMON_FUNCTION__ {} sub mkdir_or_die { # If dir is not executable: die my $dir = shift; my @dir_parts = split(m:/:,$dir); my ($ddir,$part); while(defined ($part = shift @dir_parts)) { $part eq "" and next; $ddir .= "/".$part; -d $ddir and next; mkdir $ddir; } if(not -x $dir) { ::error("Cannot write to $dir: $!"); ::wait_and_exit(255); } } sub tmpfile { # Create tempfile as $TMPDIR/parXXXXX # Returns: # $filehandle = opened file handle # $filename = file name created return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); } sub tmpname { # Select a name that does not exist # Do not create the file as it may be used for creating a socket (by tmux) my $name = shift; my($tmpname); if(not -w $ENV{'TMPDIR'}) { if(not -e $ENV{'TMPDIR'}) { ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'"); } else { ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'"); } ::wait_and_exit(255); } do { $tmpname = $ENV{'TMPDIR'}."/".$name. join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); } while($Global::unlink{$tmpname}++ or -e $tmpname); return $tmpname; } sub tmpfifo { # Securely make a fifo by securely making a dir with a fifo in it use POSIX qw(mkfifo); my $tmpfifo = tmpname("fif",@_); mkfifo($tmpfifo,0600); return $tmpfifo; } sub qqx { # Like qx but with clean environment (except for $PATH) # and STDERR ignored # This is needed if the environment contains functions # that /bin/sh does not understand my $devnull = $Global::debug ? "" : "exec 2>/dev/null;"; return qx{ $devnull @_ }; my $PATH = $ENV{'PATH'}; local(%ENV); $ENV{'PATH'} = $PATH; return qx{ ( @_ ) 2>/dev/null }; } sub uniq { # Remove duplicates and return unique values return keys %{{ map { $_ => 1 } @_ }}; } sub min { # Returns: # Minimum value of array my $min; for (@_) { # Skip undefs defined $_ or next; defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef $min = ($min < $_) ? $min : $_; } return $min; } sub max { # Returns: # Maximum value of array my $max; for (@_) { # Skip undefs defined $_ or next; defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef $max = ($max > $_) ? $max : $_; } return $max; } sub sum { # Returns: # Sum of values of array my @args = @_; my $sum = 0; for (@args) { # Skip undefs $_ and do { $sum += $_; } } return $sum; } sub undef_as_zero { my $a = shift; return $a ? $a : 0; } sub undef_as_empty { my $a = shift; return $a ? $a : ""; } sub undef_if_empty { if(defined($_[0]) and $_[0] eq "") { return undef; } return $_[0]; } sub multiply_binary_prefix { # Evalualte numbers with binary prefix # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 # 13G = 13*1024*1024*1024 = 13958643712 # Input: # $s = string with prefixes # Returns: # $value = int with prefixes multiplied my $s = shift; if(not $s) { return $s; } $s =~ s/ki/*1024/gi; $s =~ s/mi/*1024*1024/gi; $s =~ s/gi/*1024*1024*1024/gi; $s =~ s/ti/*1024*1024*1024*1024/gi; $s =~ s/pi/*1024*1024*1024*1024*1024/gi; $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi; $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; $s =~ s/K/*1024/g; $s =~ s/M/*1024*1024/g; $s =~ s/G/*1024*1024*1024/g; $s =~ s/T/*1024*1024*1024*1024/g; $s =~ s/P/*1024*1024*1024*1024*1024/g; $s =~ s/E/*1024*1024*1024*1024*1024*1024/g; $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g; $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; $s =~ s/k/*1000/g; $s =~ s/m/*1000*1000/g; $s =~ s/g/*1000*1000*1000/g; $s =~ s/t/*1000*1000*1000*1000/g; $s =~ s/p/*1000*1000*1000*1000*1000/g; $s =~ s/e/*1000*1000*1000*1000*1000*1000/g; $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g; $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; $s = eval $s; ::debug($s); return $s; } { my ($disk_full_fh, $b8193, $error_printed); sub exit_if_disk_full { # Checks if $TMPDIR is full by writing 8kb to a tmpfile # If the disk is full: Exit immediately. # Returns: # N/A if(not $disk_full_fh) { my $name; ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df"); # Separate unlink due to NFS dealing badly with File::Temp unlink $name; $b8193 = "x"x8193; } # Linux does not discover if a disk is full if writing <= 8192 # Tested on: # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos # ntfs reiserfs tmpfs ubifs vfat xfs # TODO this should be tested on different OS similar to this: # # doit() { # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop # seq 100000 | parallel --tmpdir /mnt/loop/ true & # seq 6900000 > /mnt/loop/i && echo seq OK # seq 6980868 > /mnt/loop/i # seq 10000 > /mnt/loop/ii # sleep 3 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ # echo >&2 # } print $disk_full_fh $b8193; if(not $disk_full_fh or tell $disk_full_fh != 8193) { # On raspbian the disk can be full except for 10 chars. if(not $error_printed) { ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?", "Change \$TMPDIR with --tmpdir or use --compress."); $error_printed = 1; } ::wait_and_exit(255); } truncate $disk_full_fh, 0; seek($disk_full_fh, 0, 0) || die; } } sub spacefree { # Remove comments and spaces # Inputs: # $spaces = keep 1 space? # $s = string to remove spaces from # Returns: # $s = with spaces removed my $spaces = shift; my $s = shift; $s =~ s/#.*//mg; if($spaces) { $s =~ s/\s+/ /mg; } else { $s =~ s/\s//mg; } return $s; } { my $hostname; sub hostname { if(not $hostname) { $hostname = `hostname`; chomp($hostname); $hostname ||= "nohostname"; } return $hostname; } } sub which { # Input: # @programs = programs to find the path to # Returns: # @full_path = full paths to @programs. Nothing if not found my @which; for my $prg (@_) { push(@which, grep { not -d $_ and -x $_ } map { $_."/".$prg } split(":",$ENV{'PATH'})); } return @which; } { my ($regexp,%fakename); sub parent_shell { # Input: # $pid = pid to see if (grand)*parent is a shell # Returns: # $shellpath = path to shell - undef if no shell found my $pid = shift; if(not $regexp) { # All shells known to mankind # # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh my @shells = (qw(ash bash csh dash fdsh fish fizsh ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh -sh -csh -bash), '-sh (sh)' # sh on FreeBSD ); # Can be formatted as: # [sh] -sh sh busybox sh -sh (sh) # /bin/sh /sbin/sh /opt/csw/sh # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh my $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")"; $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| [^(])'; %fakename = ( # sh disguises itself as -sh (sh) on FreeBSD "-sh (sh)" => ["sh"], # csh and tcsh disguise themselves as -sh/-csh "-sh" => ["csh", "tcsh"], "-csh" => ["tcsh", "csh"], # bash disguises itself as -bash "-bash" => ["bash", "sh"], ); } my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); my $shellpath; my $testpid = $pid; while($testpid) { ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n"); if($name_of_ref->{$testpid} =~ /$regexp/o) { ::debug("init", "which ".($3||$6)." => "); $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0]; ::debug("init", "shell path $shellpath\n"); $shellpath and last; } if($testpid == $parent_of_ref->{$testpid}) { # In Solaris zones, the PPID of the zsched process is itself last; } $testpid = $parent_of_ref->{$testpid}; } return $shellpath; } } { my %pid_parentpid_cmd; sub pid_table { # Returns: # %children_of = { pid -> children of pid } # %parent_of = { pid -> pid of parent } # %name_of = { pid -> commandname } if(not %pid_parentpid_cmd) { # Filter for SysV-style `ps` my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). q(s/^.{$s}//; print "@F[1,2] $_"' ); # Crazy msys: ' is not accepted on the cmd line, but " are treated as ' my $msys = q( ps -ef | perl -ane "1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). q(s/^.{$s}//; print qq{@F[1,2] $_}" ); # BSD-style `ps` my $bsd = q(ps -o pid,ppid,command -ax); %pid_parentpid_cmd = ( 'aix' => $sysv, 'cygwin' => $sysv, 'darwin' => $bsd, 'dec_osf' => $sysv, 'dragonfly' => $bsd, 'freebsd' => $bsd, 'gnu' => $sysv, 'hpux' => $sysv, 'linux' => $sysv, 'mirbsd' => $bsd, 'msys' => $msys, 'MSWin32' => $sysv, 'netbsd' => $bsd, 'nto' => $sysv, 'openbsd' => $bsd, 'solaris' => $sysv, 'svr5' => $sysv, 'syllable' => "echo ps not supported", ); } $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); my (@pidtable,%parent_of,%children_of,%name_of); # Table with pid -> children of pid @pidtable = `$pid_parentpid_cmd{$^O}`; my $p=$$; for (@pidtable) { # must match: 24436 21224 busybox ash # must match: 24436 21224 <<empty on MacOSX running cubase>> # or: perl -e 'while($0=" "){}' if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/ or $^O eq "darwin" and /^\s*(\S+)\s+(\S+)\s+()$/) { $parent_of{$1} = $2; push @{$children_of{$2}}, $1; $name_of{$1} = $3; } else { ::die_bug("pidtable format: $_"); } } return(\%children_of, \%parent_of, \%name_of); } } sub now { # Returns time since epoch as in seconds with 3 decimals # Uses: # @Global::use # Returns: # $time = time now with millisecond accuracy if(not $Global::use{"Time::HiRes"}) { if(eval "use Time::HiRes qw ( time );") { eval "sub TimeHiRestime { return Time::HiRes::time };"; } else { eval "sub TimeHiRestime { return time() };"; } $Global::use{"Time::HiRes"} = 1; } return (int(TimeHiRestime()*1000))/1000; } sub usleep { # Sleep this many milliseconds. # Input: # $ms = milliseconds to sleep my $ms = shift; ::debug(int($ms),"ms "); select(undef, undef, undef, $ms/1000); } sub reap_usleep { # Reap dead children. # If no dead children: Sleep specified amount with exponential backoff # Input: # $ms = milliseconds to sleep # Returns: # $ms/2+0.001 if children reaped # $ms*1.1 if no children reaped my $ms = shift; if(reaper()) { # Sleep exponentially shorter (1/2^n) if a job finished return $ms/2+0.001; } else { if($opt::timeout) { $Global::timeoutq->process_timeouts(); } if($opt::memfree) { kill_youngster_if_not_enough_mem(); } # When a child dies, wake up from sleep (or select(,,,)) $SIG{CHLD} = sub { kill "ALRM", $$ }; usleep($ms); # --compress needs $SIG{CHLD} undefined delete $SIG{CHLD}; exit_if_disk_full(); if($opt::linebuffer) { for my $job (values %Global::running) { $job->print(); } } # Sleep exponentially longer (1.1^n) if a job did not finish, # though at most 1000 ms. return (($ms < 1000) ? ($ms * 1.1) : ($ms)); } } sub kill_youngster_if_not_enough_mem { # Check each $sshlogin if there is enough mem. # If less than 50% enough free mem: kill off the youngest child # Put the child back in the queue. # Uses: # %Global::running my %jobs_of; my @sshlogins; for my $job (values %Global::running) { if(not $jobs_of{$job->sshlogin()}) { push @sshlogins, $job->sshlogin(); } push @{$jobs_of{$job->sshlogin()}}, $job; } for my $sshlogin (@sshlogins) { for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) { if($sshlogin->memfree() < $opt::memfree * 0.5) { ::debug("mem","\n",map { $_->seq()." " } (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}})); ::debug("mem","\n", $job->seq(), "killed ", $sshlogin->memfree()," < ",$opt::memfree * 0.5); $job->kill(); $sshlogin->memfree_recompute(); } else { last; } } ::debug("mem","Free mem OK ", $sshlogin->memfree()," > ",$opt::memfree * 0.5); } } sub __DEBUGGING__ {} sub debug { # Uses: # $Global::debug # %Global::fd # Returns: N/A $Global::debug or return; @_ = grep { defined $_ ? $_ : "" } @_; if($Global::debug eq "all" or $Global::debug eq $_[0]) { if($Global::fd{1}) { # Original stdout was saved my $stdout = $Global::fd{1}; print $stdout @_[1..$#_]; } else { print @_[1..$#_]; } } } sub my_memory_usage { # Returns: # memory usage if found # 0 otherwise use strict; use FileHandle; my $pid = $$; if(-e "/proc/$pid/stat") { my $fh = FileHandle->new("</proc/$pid/stat"); my $data = <$fh>; chomp $data; $fh->close; my @procinfo = split(/\s+/,$data); return undef_as_zero($procinfo[22]); } else { return 0; } } sub my_size { # Returns: # $size = size of object if Devel::Size is installed # -1 otherwise my @size_this = (@_); eval "use Devel::Size qw(size total_size)"; if ($@) { return -1; } else { return total_size(@_); } } sub my_dump { # Returns: # ascii expression of object if Data::Dump(er) is installed # error code otherwise my @dump_this = (@_); eval "use Data::Dump qw(dump);"; if ($@) { # Data::Dump not installed eval "use Data::Dumper;"; if ($@) { my $err = "Neither Data::Dump nor Data::Dumper is installed\n". "Not dumping output\n"; ::status($err); return $err; } else { return Dumper(@dump_this); } } else { # Create a dummy Data::Dump:dump as Hans Schou sometimes has # it undefined eval "sub Data::Dump:dump {}"; eval "use Data::Dump qw(dump);"; return (Data::Dump::dump(@dump_this)); } } sub my_croak { eval "use Carp; 1"; $Carp::Verbose = 1; croak(@_); } sub my_carp { eval "use Carp; 1"; $Carp::Verbose = 1; carp(@_); } sub __OBJECT_ORIENTED_PARTS__ {} package SSHLogin; sub new { my $class = shift; my $sshlogin_string = shift; my $ncpus; my %hostgroups; # SSHLogins can have these formats: # @grp+grp/ncpu//usr/bin/ssh user@server # ncpu//usr/bin/ssh user@server # /usr/bin/ssh user@server # user@server # ncpu/user@server # @grp+grp/user@server if($sshlogin_string =~ s:^\@([^/]+)/?::) { # Look for SSHLogin hostgroups %hostgroups = map { $_ => 1 } split(/\+/, $1); } if ($sshlogin_string =~ s:^(\d+)/::) { # Override default autodetected ncpus unless missing $ncpus = $1; } my $string = $sshlogin_string; # An SSHLogin is always in the hostgroup of its $string-name $hostgroups{$string} = 1; @Global::hostgroups{keys %hostgroups} = values %hostgroups; my @unget = (); my $no_slash_string = $string; $no_slash_string =~ s/[^-a-z0-9:]/_/gi; return bless { 'string' => $string, 'jobs_running' => 0, 'jobs_completed' => 0, 'maxlength' => undef, 'max_jobs_running' => undef, 'orig_max_jobs_running' => undef, 'ncpus' => $ncpus, 'hostgroups' => \%hostgroups, 'sshcommand' => undef, 'serverlogin' => undef, 'control_path_dir' => undef, 'control_path' => undef, 'time_to_login' => undef, 'last_login_at' => undef, 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" . $no_slash_string, 'loadavg' => undef, 'last_loadavg_update' => 0, 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" . $no_slash_string, 'swap_activity' => undef, }, ref($class) || $class; } sub DESTROY { my $self = shift; # Remove temporary files if they are created. unlink $self->{'loadavg_file'}; unlink $self->{'swap_activity_file'}; } sub string { my $self = shift; return $self->{'string'}; } sub jobs_running { my $self = shift; return ($self->{'jobs_running'} || "0"); } sub inc_jobs_running { my $self = shift; $self->{'jobs_running'}++; } sub dec_jobs_running { my $self = shift; $self->{'jobs_running'}--; } sub set_maxlength { my $self = shift; $self->{'maxlength'} = shift; } sub maxlength { my $self = shift; return $self->{'maxlength'}; } sub jobs_completed { my $self = shift; return $self->{'jobs_completed'}; } sub in_hostgroups { # Input: # @hostgroups = the hostgroups to look for # Returns: # true if intersection of @hostgroups and the hostgroups of this # SSHLogin is non-empty my $self = shift; return grep { defined $self->{'hostgroups'}{$_} } @_; } sub hostgroups { my $self = shift; return keys %{$self->{'hostgroups'}}; } sub inc_jobs_completed { my $self = shift; $self->{'jobs_completed'}++; $Global::total_completed++; } sub set_max_jobs_running { my $self = shift; if(defined $self->{'max_jobs_running'}) { $Global::max_jobs_running -= $self->{'max_jobs_running'}; } $self->{'max_jobs_running'} = shift; if(defined $self->{'max_jobs_running'}) { # max_jobs_running could be resat if -j is a changed file $Global::max_jobs_running += $self->{'max_jobs_running'}; } # Initialize orig to the first non-zero value that comes around $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; } sub memfree { # Returns: # $memfree in bytes my $self = shift; $self->memfree_recompute(); return (not defined $self->{'memfree'} or $self->{'memfree'}) } sub memfree_recompute { my $self = shift; my $script = memfreescript(); # TODO add sshlogin and backgrounding # Run the script twice if it gives 0 (typically intermittent error) $self->{'memfree'} = ::qqx($script) || ::qqx($script); if(not $self->{'memfree'}) { ::die_bug("Less than 1 byte free"); } #::debug("mem","New free:",$self->{'memfree'}," "); } { my $script; sub memfreescript { # Returns: # shellscript for giving available memory in bytes if(not $script) { my %script_of = ( # /proc/meminfo # MemFree: 7012 kB # Buffers: 19876 kB # Cached: 431192 kB # SwapCached: 0 kB "linux" => q[ print 1024 * qx{ ]. q[ awk '/^((Swap)?Cached|MemFree|Buffers):/ ]. q[ { sum += \$2} END { print sum }' ]. q[ /proc/meminfo } ], # $ vmstat 1 1 # procs memory page faults cpu # r b w avm free re at pi po fr de sr in sy cs us sy id # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99 "hpux" => q[ print (((reverse `vmstat 1 1`)[0] ]. q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ], # $ vmstat 1 2 # kthr memory page disk faults cpu # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97 # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92 # # The second free value is correct "solaris" => q[ print (((reverse `vmstat 1 2`)[0] ]. q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ], "freebsd" => q{ for(qx{/sbin/sysctl -a}) { if (/^([^:]+):\s+(.+)\s*$/s) { $sysctl->{$1} = $2; } } print $sysctl->{"hw.pagesize"} * ($sysctl->{"vm.stats.vm.v_cache_count"} + $sysctl->{"vm.stats.vm.v_inactive_count"} + $sysctl->{"vm.stats.vm.v_free_count"}); }, ); my $perlscript = ""; # Make a perl script that detects the OS ($^O) and runs # the appropriate command for my $os (keys %script_of) { $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}'; } $perlscript =~ s/[\t\n ]+/ /g; $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); $script = $Global::envvar. " " .$perlscript; } return $script } } sub swapping { my $self = shift; my $swapping = $self->swap_activity(); return (not defined $swapping or $swapping) } sub swap_activity { # If the currently known swap activity is too old: # Recompute a new one in the background # Returns: # last swap activity computed my $self = shift; # Should we update the swap_activity file? my $update_swap_activity_file = 0; if(-r $self->{'swap_activity_file'}) { open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r"); my $swap_out = <$swap_fh>; close $swap_fh; if($swap_out =~ /^(\d+)$/) { $self->{'swap_activity'} = $1; ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); } ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); if(time - $self->{'last_swap_activity_update'} > 10) { # last swap activity update was started 10 seconds ago ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); $update_swap_activity_file = 1; } } else { ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); $self->{'swap_activity'} = undef; $update_swap_activity_file = 1; } if($update_swap_activity_file) { ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); $self->{'last_swap_activity_update'} = time; -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; my $swap_activity; $swap_activity = swapactivityscript(); if($self->{'string'} ne ":") { $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " . ::shell_quote_scalar($swap_activity); } # Run swap_activity measuring. # As the command can take long to run if run remote # save it to a tmp file before moving it to the correct file my $file = $self->{'swap_activity_file'}; my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); ::debug("swap", "\n", $swap_activity, "\n"); ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) &"); } return $self->{'swap_activity'}; } { my $script; sub swapactivityscript { # Returns: # shellscript for detecting swap activity # # arguments for vmstat are OS dependant # swap_in and swap_out are in different columns depending on OS # if(not $script) { my %vmstat = ( # linux: $7*$8 # $ vmstat 1 2 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- # r b swpd free buff cache si so bi bo in cs us sy id wa # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], # solaris: $6*$7 # $ vmstat -S 1 2 # kthr memory page disk faults cpu # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], # darwin (macosx): $21*$22 # $ vm_stat -c 2 1 # Mach Virtual Memory Statistics: (page size of 4096 bytes) # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], # ultrix: $12*$13 # $ vmstat -S 1 2 # procs faults cpu memory page disk # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], # aix: $6*$7 # $ vmstat 1 2 # System configuration: lcpu=1 mem=2048MB # # kthr memory page faults cpu # ----- ----------- ------------------------ ------------ ----------- # r b avm fre re pi po fr sr cy in sy cs us sy id wa # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], # freebsd: $8*$9 # $ vmstat -H 1 2 # procs memory page disks faults cpu # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], # mirbsd: $8*$9 # $ vmstat 1 2 # procs memory page disks traps cpu # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], # netbsd: $7*$8 # $ vmstat 1 2 # procs memory page disks faults cpu # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], # openbsd: $8*$9 # $ vmstat 1 2 # procs memory page disks traps cpu # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], # hpux: $8*$9 # $ vmstat 1 2 # procs memory page faults cpu # r b w avm free re at pi po fr de sr in sy cs us sy id # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], # dec_osf (tru64): $11*$12 # $ vmstat 1 2 # Virtual Memory Statistics: (pagesize = 8192) # procs memory pages intr cpu # r w u act free wire fault cow zero react pin pout in sy cs us sy id # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], # gnu (hurd): $7*$8 # $ vmstat -k 1 2 # (pagesize: 4, size: 512288, swap size: 894972) # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], # -nto (qnx has no swap) #-irix #-svr5 (scosysv) ); my $perlscript = ""; # Make a perl script that detects the OS ($^O) and runs # the appropriate vmstat command for my $os (keys %vmstat) { $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . $vmstat{$os}[1] . '}"` }'; } $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); $script = $Global::envvar. " " .$perlscript; } return $script; } } sub too_fast_remote_login { my $self = shift; if($self->{'last_login_at'} and $self->{'time_to_login'}) { # sshd normally allows 10 simultaneous logins # A login takes time_to_login # So time_to_login/5 should be safe # If now <= last_login + time_to_login/5: Then it is too soon. my $too_fast = (::now() <= $self->{'last_login_at'} + $self->{'time_to_login'}/5); ::debug("run", "Too fast? $too_fast "); return $too_fast; } else { # No logins so far (or time_to_login not computed): it is not too fast return 0; } } sub last_login_at { my $self = shift; return $self->{'last_login_at'}; } sub set_last_login_at { my $self = shift; $self->{'last_login_at'} = shift; } sub loadavg_too_high { my $self = shift; my $loadavg = $self->loadavg(); return (not defined $loadavg or $loadavg > $self->max_loadavg()); } { my $cmd; sub loadavg_cmd { if(not $cmd) { # aix => "ps -ae -o state,command" # state wrong # bsd => "ps ax -o state,command" # sysv => "ps -ef -o s -o comm" # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \ # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | # awk '{print $2,$1}' # dec_osf => bsd # dragonfly => bsd # freebsd => bsd # gnu => bsd # hpux => ps -el|awk '{print $2,$14,$15}' # irix => ps -ef -o state -o comm # linux => bsd # minix => ps el|awk '{print \$1,\$11}' # mirbsd => bsd # netbsd => bsd # openbsd => bsd # solaris => sysv # svr5 => sysv # ultrix => ps -ax | awk '{print $3,$5}' # unixware => ps -el|awk '{print $2,$14,$15}' my $ps = ::spacefree(1,q{ $sysv="ps -ef -o s -o comm"; $sysv2="ps -ef -o state -o comm"; $bsd="ps ax -o state,command"; # Treat threads as processes $bsd2="ps axH -o state,command"; $psel="ps -el|awk '{ print \$2,\$14,\$15 }'"; $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n"; /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | awk '{print $2,$1}' }; $dummy="echo S COMMAND;echo R dummy"; %ps=( 'aix' => "uptime", 'cygwin' => $cygwin, 'darwin' => $bsd, 'dec_osf' => $sysv2, 'dragonfly' => $bsd, 'freebsd' => $bsd2, 'gnu' => $bsd, 'hpux' => $psel, 'irix' => $sysv2, 'linux' => $bsd2, 'minix' => "ps el|awk '{print \$1,\$11}'", 'mirbsd' => $bsd, 'msys' => $sysv, 'MSWin32' => $sysv, 'netbsd' => $bsd, 'nto' => $dummy, 'openbsd' => $bsd, 'solaris' => $sysv, 'svr5' => $psel, 'ultrix' => "ps -ax | awk '{print \$3,\$5}'", ); print `$ps{$^O}`; }); $cmd = "perl -e ".::shell_quote_scalar($ps); } return $cmd; } } sub loadavg { # If the currently know loadavg is too old: # Recompute a new one in the background # The load average is computed as the number of processes waiting for disk # or CPU right now. So it is the server load this instant and not averaged over # several minutes. This is needed so GNU Parallel will at most start one job # that will push the load over the limit. # # Returns: # $last_loadavg = last load average computed (undef if none) my $self = shift; # Should we update the loadavg file? my $update_loadavg_file = 0; if(open(my $load_fh, "<", $self->{'loadavg_file'})) { local $/; # $/ = undef => slurp whole file my $load_out = <$load_fh>; close $load_fh; # Count lines starting with D,O,R but command does not start with [ my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm); if($load > 0) { # load is overestimated by 1 $self->{'loadavg'} = $load - 1; ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n"); } elsif ($load_out=~/average: (\d+.\d+)/) { # AIX does not support instant load average # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55 $self->{'loadavg'} = $1; } else { ::die_bug("loadavg_invalid_content: " . $self->{'loadavg_file'} . "\n$load_out"); } $update_loadavg_file = 1; } else { ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); $self->{'loadavg'} = undef; $update_loadavg_file = 1; } if($update_loadavg_file) { ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); $self->{'last_loadavg_update'} = time; -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; my $cmd = ""; if($self->{'string'} ne ":") { $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " . ::shell_quote_scalar(loadavg_cmd()); } else { $cmd .= loadavg_cmd(); } # As the command can take long to run if run remote # save it to a tmp file before moving it to the correct file ::debug("load", "Cmd: ", $cmd); my $file = $self->{'loadavg_file'}; my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa"); ::qqx(" ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & "); } return $self->{'loadavg'}; } sub max_loadavg { my $self = shift; # If --load is a file it might be changed if($Global::max_load_file) { my $mtime = (stat($Global::max_load_file))[9]; if($mtime > $Global::max_load_file_last_mod) { $Global::max_load_file_last_mod = $mtime; for my $sshlogin (values %Global::host) { $sshlogin->set_max_loadavg(undef); } } } if(not defined $self->{'max_loadavg'}) { $self->{'max_loadavg'} = $self->compute_max_loadavg($opt::load); } ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); return $self->{'max_loadavg'}; } sub set_max_loadavg { my $self = shift; $self->{'max_loadavg'} = shift; } sub compute_max_loadavg { # Parse the max loadaverage that the user asked for using --load # Returns: # max loadaverage my $self = shift; my $loadspec = shift; my $load; if(defined $loadspec) { if($loadspec =~ /^\+(\d+)$/) { # E.g. --load +2 my $j = $1; $load = $self->ncpus() + $j; } elsif ($loadspec =~ /^-(\d+)$/) { # E.g. --load -2 my $j = $1; $load = $self->ncpus() - $j; } elsif ($loadspec =~ /^(\d+)\%$/) { my $j = $1; $load = $self->ncpus() * $j / 100; } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { $load = $1; } elsif (-f $loadspec) { $Global::max_load_file = $loadspec; $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; if(open(my $in_fh, "<", $Global::max_load_file)) { my $opt_load_file = join("",<$in_fh>); close $in_fh; $load = $self->compute_max_loadavg($opt_load_file); } else { ::error("Cannot open $loadspec."); ::wait_and_exit(255); } } else { ::error("Parsing of --load failed."); ::die_usage(); } if($load < 0.01) { $load = 0.01; } } return $load; } sub time_to_login { my $self = shift; return $self->{'time_to_login'}; } sub set_time_to_login { my $self = shift; $self->{'time_to_login'} = shift; } sub max_jobs_running { my $self = shift; if(not defined $self->{'max_jobs_running'}) { my $nproc = $self->compute_number_of_processes($opt::jobs); $self->set_max_jobs_running($nproc); } return $self->{'max_jobs_running'}; } sub orig_max_jobs_running { my $self = shift; return $self->{'orig_max_jobs_running'}; } sub compute_number_of_processes { # Number of processes wanted and limited by system resources # Returns: # Number of processes my $self = shift; my $opt_P = shift; my $wanted_processes = $self->user_requested_processes($opt_P); if(not defined $wanted_processes) { $wanted_processes = $Global::default_simultaneous_sshlogins; } ::debug("load", "Wanted procs: $wanted_processes\n"); my $system_limit = $self->processes_available_by_system_limit($wanted_processes); ::debug("load", "Limited to procs: $system_limit\n"); return $system_limit; } { my @children; my $max_system_proc_reached; my $more_filehandles; my %fh; my $tmpfhname; my $count_jobs_already_read; my @jobs; my $job; my @args; my $arg; sub reserve_filehandles { # Reserves filehandle my $n = shift; for (1..$n) { $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null"); } } sub reserve_process { # Spawn a dummy process my $child; if($child = fork()) { push @children, $child; $Global::unkilled_children{$child} = 1; } elsif(defined $child) { # This is the child # The child takes one process slot # It will be killed later $SIG{'TERM'} = $Global::original_sig{'TERM'}; sleep 10101010; exit(0); } else { # Failed to spawn $max_system_proc_reached = 1; } } sub get_args_or_jobs { # Get an arg or a job (depending on mode) if($Global::semaphore or $opt::pipe) { # Skip: No need to get args return 1; } elsif(defined $opt::retries and $count_jobs_already_read) { # For retries we may need to run all jobs on this sshlogin # so include the already read jobs for this sshlogin $count_jobs_already_read--; return 1; } else { if($opt::X or $opt::m) { # The arguments may have to be re-spread over several jobslots # So pessimistically only read one arg per jobslot # instead of a full commandline if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { if($Global::JobQueue->empty()) { return 0; } else { $job = $Global::JobQueue->get(); push(@jobs, $job); return 1; } } else { $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); push(@args, $arg); return 1; } } else { # If there are no more command lines, then we have a process # per command line, so no need to go further if($Global::JobQueue->empty()) { return 0; } else { $job = $Global::JobQueue->get(); push(@jobs, $job); return 1; } } } } sub cleanup { # Cleanup: Close the files for (values %fh) { close $_ } # Cleanup: Kill the children for my $pid (@children) { kill 9, $pid; waitpid($pid,0); delete $Global::unkilled_children{$pid}; } # Cleanup: Unget the command_lines or the @args $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); @args = (); $Global::JobQueue->unget(@jobs); @jobs = (); } sub processes_available_by_system_limit { # If the wanted number of processes is bigger than the system limits: # Limit them to the system limits # Limits are: File handles, number of input lines, processes, # and taking > 1 second to spawn 10 extra processes # Returns: # Number of processes my $self = shift; my $wanted_processes = shift; my $system_limit = 0; my $slow_spawining_warning_printed = 0; my $time = time; $more_filehandles = 1; $tmpfhname = "TmpFhNamE"; # perl uses 7 filehandles for something? # parallel uses 1 for memory_usage # parallel uses 4 for ? reserve_filehandles(12); # Two processes for load avg and ? reserve_process(); reserve_process(); # For --retries count also jobs already run $count_jobs_already_read = $Global::JobQueue->next_seq(); my $wait_time_for_getting_args = 0; my $start_time = time; while(1) { $system_limit >= $wanted_processes and last; not $more_filehandles and last; $max_system_proc_reached and last; my $before_getting_arg = time; if(!$opt::roundrobin) { get_args_or_jobs() or last; } $wait_time_for_getting_args += time - $before_getting_arg; $system_limit++; # Every simultaneous process uses 2 filehandles to write to # and 2 filehandles to read from reserve_filehandles(4); # System process limit reserve_process(); my $forktime = time - $time - $wait_time_for_getting_args; ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ", $forktime, " (processes so far: ", $system_limit,")\n"); if($system_limit > 10 and $forktime > 1 and $forktime > $system_limit * 0.01 and not $slow_spawining_warning_printed) { # It took more than 0.01 second to fork a processes on avg. # Give the user a warning. He can press Ctrl-C if this # sucks. ::warning("Starting $system_limit processes took > $forktime sec.", "Consider adjusting -j. Press CTRL-C to stop."); $slow_spawining_warning_printed = 1; } } cleanup(); if($system_limit < $wanted_processes) { # The system_limit is less than the wanted_processes if($system_limit < 1 and not $Global::JobQueue->empty()) { ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf", "or /proc/sys/kernel/pid_max may help."); ::wait_and_exit(255); } if(not $more_filehandles) { ::warning("Only enough file handles to run ". $system_limit. " jobs in parallel.", "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or ", "raising ulimit -n or /etc/security/limits.conf may help."); } if($max_system_proc_reached) { ::warning("Only enough available processes to run ". $system_limit. " jobs in parallel.", "Raising ulimit -u or /etc/security/limits.conf ", "or /proc/sys/kernel/pid_max may help."); } } if($] == 5.008008 and $system_limit > 1000) { # https://savannah.gnu.org/bugs/?36942 $system_limit = 1000; } if($Global::JobQueue->empty()) { $system_limit ||= 1; } if($self->string() ne ":" and $system_limit > $Global::default_simultaneous_sshlogins) { $system_limit = $self->simultaneous_sshlogin_limit($system_limit); } return $system_limit; } } sub simultaneous_sshlogin_limit { # Test by logging in wanted number of times simultaneously # Returns: # min($wanted_processes,$working_simultaneous_ssh_logins-1) my $self = shift; my $wanted_processes = shift; if($self->{'time_to_login'}) { return $wanted_processes; } # Try twice because it guesses wrong sometimes # Choose the minimal my $ssh_limit = ::min($self->simultaneous_sshlogin($wanted_processes), $self->simultaneous_sshlogin($wanted_processes)); if($ssh_limit < $wanted_processes) { my $serverlogin = $self->serverlogin(); ::warning("ssh to $serverlogin only allows ". "for $ssh_limit simultaneous logins.", "You may raise this by changing ". "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.", "Using only ".($ssh_limit-1)." connections ". "to avoid race conditions."); } # Race condition can cause problem if using all sshs. if($ssh_limit > 1) { $ssh_limit -= 1; } return $ssh_limit; } sub simultaneous_sshlogin { # Using $sshlogin try to see if we can do $wanted_processes # simultaneous logins # (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l # Returns: # Number of succesful logins my $self = shift; my $wanted_processes = shift; my $sshcmd = $self->sshcommand(); my $serverlogin = $self->serverlogin(); my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; # TODO sh -c wrapper to work for csh my $cmd = "$sshdelay$sshcmd $serverlogin -- echo simultaneouslogin </dev/null 2>&1 &"x$wanted_processes; ::debug("init", "Trying $wanted_processes logins at $serverlogin\n"); open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or ::die_bug("simultaneouslogin"); my $ssh_limit = <$simul_fh>; close $simul_fh; chomp $ssh_limit; return $ssh_limit; } sub set_ncpus { my $self = shift; $self->{'ncpus'} = shift; } sub user_requested_processes { # Parse the number of processes that the user asked for using -j # Returns: # the number of processes to run on this sshlogin my $self = shift; my $opt_P = shift; my $processes; if(defined $opt_P) { if($opt_P =~ /^\+(\d+)$/) { # E.g. -P +2 my $j = $1; $processes = $self->ncpus() + $j; } elsif ($opt_P =~ /^-(\d+)$/) { # E.g. -P -2 my $j = $1; $processes = $self->ncpus() - $j; } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { # E.g. -P 10.5% my $j = $1; $processes = $self->ncpus() * $j / 100; } elsif ($opt_P =~ /^(\d+)$/) { $processes = $1; if($processes == 0) { # -P 0 = infinity (or at least close) $processes = $Global::infinity; } } elsif (-f $opt_P) { $Global::max_procs_file = $opt_P; if(open(my $in_fh, "<", $Global::max_procs_file)) { my $opt_P_file = join("",<$in_fh>); close $in_fh; $processes = $self->user_requested_processes($opt_P_file); } else { ::error("Cannot open $opt_P."); ::wait_and_exit(255); } } else { ::error("Parsing of --jobs/-j/--max-procs/-P failed."); ::die_usage(); } $processes = ::ceil($processes); } return $processes; } sub ncpus { my $self = shift; if(not defined $self->{'ncpus'}) { my $sshcmd = $self->sshcommand(); my $serverlogin = $self->serverlogin(); if($serverlogin eq ":") { if($opt::use_cpus_instead_of_cores) { $self->{'ncpus'} = no_of_cpus(); } else { $self->{'ncpus'} = no_of_cores(); } } else { my $ncpu; my $sqe = ::shell_quote_scalar($Global::envvar); if($opt::use_cpus_instead_of_cores) { $ncpu = ::qqx("echo|$sshcmd $serverlogin -- $sqe parallel --number-of-cpus"); } else { ::debug("init",qq(echo|$sshcmd $serverlogin -- $sqe parallel --number-of-cores\n)); $ncpu = ::qqx("echo|$sshcmd $serverlogin -- $sqe parallel --number-of-cores"); } chomp $ncpu; if($ncpu =~ /^\s*[0-9]+\s*$/s) { $self->{'ncpus'} = $ncpu; } else { ::warning("Could not figure out ". "number of cpus on $serverlogin ($ncpu). Using 1."); $self->{'ncpus'} = 1; } } } return $self->{'ncpus'}; } sub no_of_cpus { # Returns: # Number of physical CPUs local $/ = "\n"; # If delimiter is set, then $/ will be wrong my $no_of_cpus; if ($^O eq 'linux') { $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux(); } elsif ($^O eq 'freebsd') { $no_of_cpus = no_of_cpus_freebsd(); } elsif ($^O eq 'netbsd') { $no_of_cpus = no_of_cpus_netbsd(); } elsif ($^O eq 'openbsd') { $no_of_cpus = no_of_cpus_openbsd(); } elsif ($^O eq 'gnu') { $no_of_cpus = no_of_cpus_hurd(); } elsif ($^O eq 'darwin') { $no_of_cpus = no_of_cpus_darwin(); } elsif ($^O eq 'solaris') { $no_of_cpus = no_of_cpus_solaris(); } elsif ($^O eq 'aix') { $no_of_cpus = no_of_cpus_aix(); } elsif ($^O eq 'hpux') { $no_of_cpus = no_of_cpus_hpux(); } elsif ($^O eq 'nto') { $no_of_cpus = no_of_cpus_qnx(); } elsif ($^O eq 'svr5') { $no_of_cpus = no_of_cpus_openserver(); } elsif ($^O eq 'irix') { $no_of_cpus = no_of_cpus_irix(); } elsif ($^O eq 'dec_osf') { $no_of_cpus = no_of_cpus_tru64(); } else { $no_of_cpus = (no_of_cpus_gnu_linux() || no_of_cpus_freebsd() || no_of_cpus_netbsd() || no_of_cpus_openbsd() || no_of_cpus_hurd() || no_of_cpus_darwin() || no_of_cpus_solaris() || no_of_cpus_aix() || no_of_cpus_hpux() || no_of_cpus_qnx() || no_of_cpus_openserver() || no_of_cpus_irix() || no_of_cpus_tru64() # Number of cores is better than no guess for #CPUs || nproc() ); } if($no_of_cpus) { chomp $no_of_cpus; return $no_of_cpus; } else { ::warning("Cannot figure out number of cpus. Using 1."); return 1; } } sub no_of_cores { # Returns: # Number of CPU cores local $/ = "\n"; # If delimiter is set, then $/ will be wrong my $no_of_cores; if ($^O eq 'linux') { $no_of_cores = no_of_cores_gnu_linux(); } elsif ($^O eq 'freebsd') { $no_of_cores = no_of_cores_freebsd(); } elsif ($^O eq 'netbsd') { $no_of_cores = no_of_cores_netbsd(); } elsif ($^O eq 'openbsd') { $no_of_cores = no_of_cores_openbsd(); } elsif ($^O eq 'gnu') { $no_of_cores = no_of_cores_hurd(); } elsif ($^O eq 'darwin') { $no_of_cores = no_of_cores_darwin(); } elsif ($^O eq 'solaris') { $no_of_cores = no_of_cores_solaris(); } elsif ($^O eq 'aix') { $no_of_cores = no_of_cores_aix(); } elsif ($^O eq 'hpux') { $no_of_cores = no_of_cores_hpux(); } elsif ($^O eq 'nto') { $no_of_cores = no_of_cores_qnx(); } elsif ($^O eq 'svr5') { $no_of_cores = no_of_cores_openserver(); } elsif ($^O eq 'irix') { $no_of_cores = no_of_cores_irix(); } elsif ($^O eq 'dec_osf') { $no_of_cores = no_of_cores_tru64(); } else { $no_of_cores = (no_of_cores_gnu_linux() || no_of_cores_freebsd() || no_of_cores_netbsd() || no_of_cores_openbsd() || no_of_cores_hurd() || no_of_cores_darwin() || no_of_cores_solaris() || no_of_cores_aix() || no_of_cores_hpux() || no_of_cores_qnx() || no_of_cores_openserver() || no_of_cores_irix() || no_of_cores_tru64() || nproc() ); } if($no_of_cores) { chomp $no_of_cores; return $no_of_cores; } else { ::warning("Cannot figure out number of CPU cores. Using 1."); return 1; } } sub nproc { # Returns: # Number of cores using `nproc` my $no_of_cores = ::qqx("nproc"); return $no_of_cores; } sub no_of_cpus_gnu_linux { # Returns: # Number of physical CPUs on GNU/Linux # undef if not GNU/Linux my $no_of_cpus; my $no_of_cores; my $no_of_active_cores; if(-e "/proc/cpuinfo") { $no_of_cpus = 0; $no_of_cores = 0; my %seen; if(open(my $in_fh, "<", "/proc/cpuinfo")) { while(<$in_fh>) { if(/^physical id.*[:](.*)/ and not $seen{$1}++) { $no_of_cpus++; } /^processor.*[:]/i and $no_of_cores++; } close $in_fh; } } if(-e "/proc/self/status") { # if 'taskset' is used to limit number of cores if(open(my $in_fh, "<", "/proc/self/status")) { while(<$in_fh>) { if(/^Cpus_allowed:\s*(\S+)/) { my $a = $1; $a =~ tr/,//d; $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); } } close $in_fh; } } return (::min($no_of_cpus || $no_of_cores,$no_of_active_cores)); } sub no_of_cores_gnu_linux { # Returns: # Number of CPU cores on GNU/Linux # undef if not GNU/Linux my $no_of_cores; my $no_of_active_cores; if(-e "/proc/cpuinfo") { $no_of_cores = 0; open(my $in_fh, "<", "/proc/cpuinfo") || return undef; while(<$in_fh>) { /^processor.*[:]/i and $no_of_cores++; } close $in_fh; } if(-e "/proc/self/status") { # if 'taskset' is used to limit number of cores if(open(my $in_fh, "<", "/proc/self/status")) { while(<$in_fh>) { if(/^Cpus_allowed:\s*(\S+)/) { my $a = $1; $a =~ tr/,//d; $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); } } close $in_fh; } } return (::min($no_of_cores,$no_of_active_cores)); } sub no_of_cpus_freebsd { # Returns: # Number of physical CPUs on FreeBSD # undef if not FreeBSD my $no_of_cpus = (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }) or ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })); chomp $no_of_cpus; return $no_of_cpus; } sub no_of_cores_freebsd { # Returns: # Number of CPU cores on FreeBSD # undef if not FreeBSD my $no_of_cores = (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }) or ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })); chomp $no_of_cores; return $no_of_cores; } sub no_of_cpus_netbsd { # Returns: # Number of physical CPUs on NetBSD # undef if not NetBSD my $no_of_cpus = ::qqx("sysctl -n hw.ncpu"); chomp $no_of_cpus; return $no_of_cpus; } sub no_of_cores_netbsd { # Returns: # Number of CPU cores on NetBSD # undef if not NetBSD my $no_of_cores = ::qqx("sysctl -n hw.ncpu"); chomp $no_of_cores; return $no_of_cores; } sub no_of_cpus_openbsd { # Returns: # Number of physical CPUs on OpenBSD # undef if not OpenBSD my $no_of_cpus = ::qqx('sysctl -n hw.ncpu'); chomp $no_of_cpus; return $no_of_cpus; } sub no_of_cores_openbsd { # Returns: # Number of CPU cores on OpenBSD # undef if not OpenBSD my $no_of_cores = ::qqx('sysctl -n hw.ncpu'); chomp $no_of_cores; return $no_of_cores; } sub no_of_cpus_hurd { # Returns: # Number of physical CPUs on HURD # undef if not HURD my $no_of_cpus = ::qqx("nproc"); chomp $no_of_cpus; return $no_of_cpus; } sub no_of_cores_hurd { # Returns: # Number of physical CPUs on HURD # undef if not HURD my $no_of_cores = ::qqx("nproc"); chomp $no_of_cores; return $no_of_cores; } sub no_of_cpus_darwin { # Returns: # Number of physical CPUs on MacOSX Darwin # undef if not MacOSX Darwin my $no_of_cpus = (::qqx('sysctl -n hw.physicalcpu') or ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' })); return $no_of_cpus; } sub no_of_cores_darwin { # Returns: # Number of CPU cores on Mac Darwin # undef if not Mac Darwin my $no_of_cores = (::qqx('sysctl -n hw.logicalcpu') or ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' })); return $no_of_cores; } sub no_of_cpus_solaris { # Returns: # Number of physical CPUs on Solaris # undef if not Solaris if(-x "/usr/sbin/psrinfo") { my @psrinfo = ::qqx("/usr/sbin/psrinfo"); if($#psrinfo >= 0) { return $#psrinfo +1; } } if(-x "/usr/sbin/prtconf") { my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance"); if($#prtconf >= 0) { return $#prtconf +1; } } return undef; } sub no_of_cores_solaris { # Returns: # Number of CPU cores on Solaris # undef if not Solaris if(-x "/usr/sbin/psrinfo") { my @psrinfo = ::qqx("/usr/sbin/psrinfo"); if($#psrinfo >= 0) { return $#psrinfo +1; } } if(-x "/usr/sbin/prtconf") { my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance"); if($#prtconf >= 0) { return $#prtconf +1; } } return undef; } sub no_of_cpus_aix { # Returns: # Number of physical CPUs on AIX # undef if not AIX my $no_of_cpus = 0; if(-x "/usr/sbin/lscfg") { open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '") || return undef; $no_of_cpus = <$in_fh>; chomp ($no_of_cpus); close $in_fh; } return $no_of_cpus; } sub no_of_cores_aix { # Returns: # Number of CPU cores on AIX # undef if not AIX my $no_of_cores; if(-x "/usr/bin/vmstat") { open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef; while(<$in_fh>) { /lcpu=([0-9]*) / and $no_of_cores = $1; } close $in_fh; } return $no_of_cores; } sub no_of_cpus_hpux { # Returns: # Number of physical CPUs on HP-UX # undef if not HP-UX my $no_of_cpus = ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'}); return $no_of_cpus; } sub no_of_cores_hpux { # Returns: # Number of CPU cores on HP-UX # undef if not HP-UX my $no_of_cores = ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1\n"'}); return $no_of_cores; } sub no_of_cpus_qnx { # Returns: # Number of physical CPUs on QNX # undef if not QNX # BUG: It is not known how to calculate this. my $no_of_cpus = 0; return $no_of_cpus; } sub no_of_cores_qnx { # Returns: # Number of CPU cores on QNX # undef if not QNX # BUG: It is not known how to calculate this. my $no_of_cores = 0; return $no_of_cores; } sub no_of_cpus_openserver { # Returns: # Number of physical CPUs on SCO OpenServer # undef if not SCO OpenServer my $no_of_cpus = 0; if(-x "/usr/sbin/psrinfo") { my @psrinfo = ::qqx("/usr/sbin/psrinfo"); if($#psrinfo >= 0) { return $#psrinfo +1; } } return $no_of_cpus; } sub no_of_cores_openserver { # Returns: # Number of CPU cores on SCO OpenServer # undef if not SCO OpenServer my $no_of_cores = 0; if(-x "/usr/sbin/psrinfo") { my @psrinfo = ::qqx("/usr/sbin/psrinfo"); if($#psrinfo >= 0) { return $#psrinfo +1; } } return $no_of_cores; } sub no_of_cpus_irix { # Returns: # Number of physical CPUs on IRIX # undef if not IRIX my $no_of_cpus = ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' }); return $no_of_cpus; } sub no_of_cores_irix { # Returns: # Number of CPU cores on IRIX # undef if not IRIX my $no_of_cores = ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' }); return $no_of_cores; } sub no_of_cpus_tru64 { # Returns: # Number of physical CPUs on Tru64 # undef if not Tru64 my $no_of_cpus = ::qqx("sizer -pr"); return $no_of_cpus; } sub no_of_cores_tru64 { # Returns: # Number of CPU cores on Tru64 # undef if not Tru64 my $no_of_cores = ::qqx("sizer -pr"); return $no_of_cores; } sub sshcommand { my $self = shift; if (not defined $self->{'sshcommand'}) { $self->sshcommand_of_sshlogin(); } return $self->{'sshcommand'}; } sub serverlogin { my $self = shift; if (not defined $self->{'serverlogin'}) { $self->sshcommand_of_sshlogin(); } return $self->{'serverlogin'}; } sub sshcommand_of_sshlogin { # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server') # 'user@server' -> ('ssh','user@server') # 'myssh user@server' -> ('myssh','user@server') # 'myssh -l user server' -> ('myssh -l user','server') # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server') # Returns: # sshcommand - defaults to 'ssh' # login@host my $self = shift; my ($sshcmd, $serverlogin); # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh' $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh"; if($self->{'string'} =~ /(.+) (\S+)$/) { # Own ssh command $sshcmd = $1; $serverlogin = $2; } else { # Normal ssh if($opt::controlmaster) { # Use control_path to make ssh faster my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; $sshcmd = $opt::ssh." -S ".$control_path; $serverlogin = $self->{'string'}; if(not $self->{'control_path'}{$control_path}++) { # Master is not running for this control_path # Start it my $pid = fork(); if($pid) { $Global::sshmaster{$pid} ||= 1; } else { $SIG{'TERM'} = undef; # Ignore the 'foo' being printed open(STDOUT,">","/dev/null"); # STDERR >/dev/null to ignore open(STDERR,">","/dev/null"); open(STDIN,"<","/dev/null"); # Run a sleep that outputs data, so it will discover # if the ssh connection closes. my $sleep = ::shell_quote_scalar ('$|=1;while(1){sleep 1;print "foo\n"}'); my @master = ($opt::ssh, "-MTS", $control_path, $serverlogin, "--", "perl", "-e", $sleep); exec(@master); } } } else { $sshcmd = $opt::ssh; $serverlogin = $self->{'string'}; } } if($serverlogin =~ s/(\S+)\@(\S+)/$2/) { # convert user@server to '-l user server' # because lsh does not support user@server $sshcmd = $sshcmd." -l ".$1; } $self->{'sshcommand'} = $sshcmd; $self->{'serverlogin'} = $serverlogin; } sub control_path_dir { # Returns: # path to directory my $self = shift; if(not defined $self->{'control_path_dir'}) { $self->{'control_path_dir'} = # Use $ENV{'TMPDIR'} as that is typically not # NFS mounted File::Temp::tempdir($ENV{'TMPDIR'} . "/control_path_dir-XXXX", CLEANUP => 1); } return $self->{'control_path_dir'}; } sub rsync_transfer_cmd { # Command to run to transfer a file # Input: # $file = filename of file to transfer # $workdir = destination dir # Returns: # $cmd = rsync command to run to transfer $file ("" if unreadable) my $self = shift; my $file = shift; my $workdir = shift; if(not -r $file) { ::warning($file. " is not readable and will not be transferred."); return "true"; } my $rsync_destdir; if($file =~ m:^/:) { # rsync /foo/bar / $rsync_destdir = "/"; } else { $rsync_destdir = ::shell_quote_file($workdir); } $file = ::shell_quote_file($file); my $sshcmd = $self->sshcommand(); my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd); my $serverlogin = $self->serverlogin(); # Make dir if it does not exist return "( $sshcmd $serverlogin -- mkdir -p $rsync_destdir;" . rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )"; } sub cleanup_cmd { # Command to run to remove the remote file # Input: # $file = filename to remove # $workdir = destination dir # Returns: # $cmd = ssh command to run to remove $file and empty parent dirs my $self = shift; my $file = shift; my $workdir = shift; my $f = $file; if($f =~ m:/\./:) { # foo/bar/./baz/quux => workdir/baz/quux # /foo/bar/./baz/quux => workdir/baz/quux $f =~ s:.*/\./:$workdir/:; } elsif($f =~ m:^[^/]:) { # foo/bar => workdir/foo/bar $f = $workdir."/".$f; } my @subdirs = split m:/:, ::dirname($f); my @rmdir; my $dir = ""; for(@subdirs) { $dir .= $_."/"; unshift @rmdir, ::shell_quote_file($dir); } my $rmdir = @rmdir ? "sh -c 'rmdir @rmdir 2>/dev/null';" : ""; if(defined $opt::workdir and $opt::workdir eq "...") { $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; } $f = ::shell_quote_file($f); my $sshcmd = $self->sshcommand(); my $serverlogin = $self->serverlogin(); return "$sshcmd $serverlogin -- ".::shell_quote_scalar("(rm -f $f; $rmdir)"); } { my $rsync; sub rsync { # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. # If the version >= 3.1.0: downgrade to protocol 30 if(not $rsync) { my @out = `rsync --version`; for (@out) { if(/version (\d+.\d+)(.\d+)?/) { if($1 >= 3.1) { # Version 3.1.0 or later: Downgrade to protocol 30 $rsync = "rsync --protocol 30"; } else { $rsync = "rsync"; } } } $rsync or ::die_bug("Cannot figure out version of rsync: @out"); } return $rsync; } } package JobQueue; sub new { my $class = shift; my $commandref = shift; my $read_from = shift; my $context_replace = shift; my $max_number_of_args = shift; my $transfer_files = shift; my $return_files = shift; my $commandlinequeue = CommandLineQueue->new ($commandref, $read_from, $context_replace, $max_number_of_args, $transfer_files, $return_files); my @unget = (); return bless { 'unget' => \@unget, 'commandlinequeue' => $commandlinequeue, 'this_job_no' => 0, 'total_jobs' => undef, }, ref($class) || $class; } sub get { my $self = shift; $self->{'this_job_no'}++; if(@{$self->{'unget'}}) { return shift @{$self->{'unget'}}; } else { my $commandline = $self->{'commandlinequeue'}->get(); if(defined $commandline) { return Job->new($commandline); } else { $self->{'this_job_no'}--; return undef; } } } sub unget { my $self = shift; unshift @{$self->{'unget'}}, @_; $self->{'this_job_no'} -= @_; } sub empty { my $self = shift; my $empty = (not @{$self->{'unget'}}) && $self->{'commandlinequeue'}->empty(); ::debug("run", "JobQueue->empty $empty "); return $empty; } sub total_jobs { my $self = shift; if(not defined $self->{'total_jobs'}) { if($opt::pipe) { ::error("--pipe is incompatible with --eta/--bar/--shuf"); ::wait_and_exit(255); } my $record; my @arg_records; my $record_queue = $self->{'commandlinequeue'}{'arg_queue'}; my $start = time; while($record = $record_queue->get()) { if(time - $start > 10) { ::warning("Reading ".scalar(@arg_records). " arguments took longer than 10 seconds."); $opt::eta && ::warning("Consider removing --eta."); $opt::bar && ::warning("Consider removing --bar."); $opt::shuf && ::warning("Consider removing --shuf."); last; } push @arg_records, $record; } while($record = $record_queue->get()) { push @arg_records, $record; } if($opt::shuf) { my $i = @arg_records; while (--$i) { my $j = int rand($i+1); @arg_records[$i,$j] = @arg_records[$j,$i]; } } $record_queue->unget(@arg_records); $self->{'total_jobs'} = ::ceil((1+$#arg_records+$self->{'this_job_no'}) / ::max($Global::max_number_of_args,1)); ::debug("init","Total jobs: ".$self->{'total_jobs'}. " (".(1+$#arg_records)."+".$self->{'this_job_no'}.")\n"); } return $self->{'total_jobs'}; } sub next_seq { my $self = shift; return $self->{'commandlinequeue'}->seq(); } sub quote_args { my $self = shift; return $self->{'commandlinequeue'}->quote_args(); } package Job; sub new { my $class = shift; my $commandlineref = shift; return bless { 'commandline' => $commandlineref, # CommandLine object 'workdir' => undef, # --workdir # filehandle for stdin (used for --pipe) # filename for writing stdout to (used for --files) # remaining data not sent to stdin (used for --pipe) # amount of data sent via stdin (used for --pipe) 'transfersize' => 0, # size of files using --transfer 'returnsize' => 0, # size of files using --return 'pid' => undef, # hash of { SSHLogins => number of times the command failed there } 'failed' => undef, 'sshlogin' => undef, # The commandline wrapped with rsync and ssh 'sshlogin_wrap' => undef, 'exitstatus' => undef, 'exitsignal' => undef, # Timestamp for timeout if any 'timeout' => undef, 'virgin' => 1, }, ref($class) || $class; } sub replaced { my $self = shift; $self->{'commandline'} or ::die_bug("commandline empty"); return $self->{'commandline'}->replaced(); } sub seq { my $self = shift; return $self->{'commandline'}->seq(); } sub set_seq { my $self = shift; return $self->{'commandline'}->set_seq(shift); } sub slot { my $self = shift; return $self->{'commandline'}->slot(); } { my($cattail); sub cattail { # Returns: # $cattail = perl program for: # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink] if(not $cattail) { $cattail = q{ # cat followed by tail (possibly with rm as soon at the file is opened) # If $writerpid dead: finish after this round use Fcntl; $|=1; my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV; if($read_file) { open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); } else { *IN = *STDIN; } while(! -s $comfile) { # Writer has not opened the buffer file, so we cannot remove it yet $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); usleep($sleep); } # The writer and we have both opened the file, so it is safe to unlink it unlink $unlink_file; unlink $comfile; my $first_round = 1; my $flags; fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle $flags |= O_NONBLOCK; # Add non-blocking to the flags fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle while(1) { # clear EOF seek(IN,0,1); my $writer_running = kill 0, $writerpid; $read = sysread(IN,$buf,131072); if($read) { if($first_round) { # Only start the command if there any input to process $first_round = 0; open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); } # Blocking print while($buf) { my $bytes_written = syswrite(OUT,$buf); # syswrite may be interrupted by SIGHUP substr($buf,0,$bytes_written) = ""; } # Something printed: Wait less next time $sleep /= 2; } else { if(eof(IN) and not $writer_running) { # Writer dead: There will never be more to read => exit exit; } # TODO This could probably be done more efficiently using select(2) # Nothing read: Wait longer before next read # Up to 100 milliseconds $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep); usleep($sleep); } } sub usleep { # Sleep this many milliseconds. my $secs = shift; select(undef, undef, undef, $secs/1000); } }; $cattail =~ s/#.*//mg; $cattail =~ s/\s+/ /g; } return $cattail; } } sub openoutputfiles { # Open files for STDOUT and STDERR # Set file handles in $self->fh my $self = shift; my ($outfhw, $errfhw, $outname, $errname); if($opt::results) { my $args_as_dirname = $self->{'commandline'}->args_as_dirname(); # Output in: prefix/name1/val1/name2/val2/stdout my $dir = $opt::results."/".$args_as_dirname; if(eval{ File::Path::mkpath($dir); }) { # OK } else { # mkpath failed: Argument probably too long. # Set $Global::max_file_length, which will keep the individual # dir names shorter than the max length max_file_name_length($opt::results); $args_as_dirname = $self->{'commandline'}->args_as_dirname(); # prefix/name1/val1/name2/val2/ $dir = $opt::results."/".$args_as_dirname; File::Path::mkpath($dir); } # prefix/name1/val1/name2/val2/seq my $seqname = "$dir/seq"; my $seqfhw; if(not open($seqfhw, "+>", $seqname)) { ::error("Cannot write to `$seqname'."); ::wait_and_exit(255); } print $seqfhw $self->seq(); close $seqfhw; # prefix/name1/val1/name2/val2/stdout $outname = "$dir/stdout"; if(not open($outfhw, "+>", $outname)) { ::error("Cannot write to `$outname'."); ::wait_and_exit(255); } # prefix/name1/val1/name2/val2/stderr $errname = "$dir/stderr"; if(not open($errfhw, "+>", $errname)) { ::error("Cannot write to `$errname'."); ::wait_and_exit(255); } $self->set_fh(1,"unlink",""); $self->set_fh(2,"unlink",""); if($opt::sqlworker) { # Save the filenames in SQL table $Global::sql->update("SET Stdout = ? WHERE Seq = ".$self->seq(), $outname); $Global::sql->update("SET Stderr = ? WHERE Seq = ".$self->seq(), $errname); } } elsif(not $opt::ungroup) { # To group we create temporary files for STDOUT and STDERR # To avoid the cleanup unlink the files immediately (but keep them open) if(@Global::tee_jobs) { # files must be removed when the tee is done } elsif($opt::files) { ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); # --files => only remove stderr $self->set_fh(1,"unlink",""); $self->set_fh(2,"unlink",$errname); } else { ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); $self->set_fh(1,"unlink",$outname); $self->set_fh(2,"unlink",$errname); } } else { # --ungroup open($outfhw,">&",$Global::fd{1}) || die; open($errfhw,">&",$Global::fd{2}) || die; # File name must be empty as it will otherwise be printed $outname = ""; $errname = ""; $self->set_fh(1,"unlink",$outname); $self->set_fh(2,"unlink",$errname); } # Set writing FD $self->set_fh(1,'w',$outfhw); $self->set_fh(2,'w',$errfhw); $self->set_fh(1,'name',$outname); $self->set_fh(2,'name',$errname); if($opt::compress) { $self->filter_through_compress(); } elsif(not $opt::ungroup) { $self->grouped(); } if($opt::linebuffer) { $self->set_non_blocking(); } } sub grouped { my $self = shift; # Set reading FD if using --group (--ungroup does not need) for my $fdno (1,2) { # Re-open the file for reading # so fdw can be closed seperately # and fdr can be seeked seperately (for --line-buffer) open(my $fdr,"<", $self->fh($fdno,'name')) || ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); $self->set_fh($fdno,'r',$fdr); # Unlink if required $Global::debug or unlink $self->fh($fdno,"unlink"); } } sub empty_input_wrapper { # If no input: exit(0) # If some input: Pass input as input to command on STDIN # This avoids starting the command if there is no input. # Input: # $command = command to pipe data to # Returns: # $wrapped_command = the wrapped command my $command = shift; my $script = ::spacefree(0,q{ if(sysread(STDIN, $buf, 1)) { open($fh, "|-", "@ARGV") || die; syswrite($fh, $buf); # Align up to 128k block if($read = sysread(STDIN, $buf, 131071)) { syswrite($fh, $buf); } while($read = sysread(STDIN, $buf, 131072)) { syswrite($fh, $buf); } close $fh; exit ($?&127 ? 128+($?&127) : 1+$?>>8) } }); ::debug("run",'Empty wrap: perl -e '.::shell_quote_scalar($script)."\n"); return 'perl -e '.::shell_quote_scalar($script)." ". ::shell_quote_scalar($Global::shell." -c ".::shell_quote_scalar($command)); } sub filter_through_compress { my $self = shift; # Send stdout to stdin for $opt::compress_program(1) # Send stderr to stdin for $opt::compress_program(2) # cattail get pid: $pid = $self->fh($fdno,'rpid'); my $cattail = cattail(); for my $fdno (1,2) { # Make a communication file. my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac"); close $fh; # Compressor: (echo > $comfile; compress pipe) > output # When the echo is written to $comfile, # it is known that output file is opened, # thus output file can then be removed by the decompressor. my $wpid = open(my $fdw,"|-", "(echo > $comfile; ". empty_input_wrapper($opt::compress_program).") >". $self->fh($fdno,'name')) || die $?; $self->set_fh($fdno,'w',$fdw); $self->set_fh($fdno,'wpid',$wpid); # Decompressor: open output; -s $comfile > 0: rm $comfile output; # decompress output > stdout my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile, $opt::decompress_program, $wpid, $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?; $self->set_fh($fdno,'r',$fdr); $self->set_fh($fdno,'rpid',$rpid); } } sub set_non_blocking { my $self = shift; $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; for my $fdno (1,2) { my $fdr = $self->fh($fdno,'r'); my $flags; fcntl($fdr, &::F_GETFL, $flags) || die $!; # Get the current flags on the filehandle $flags |= &::O_NONBLOCK; # Add non-blocking to the flags fcntl($fdr, &::F_SETFL, $flags) || die $!; # Set the flags on the filehandle } } sub max_file_name_length { # Figure out the max length of a subdir # TODO and the max total length # Ext4 = 255,130816 # Uses: # $Global::max_file_length is set # Returns: # $Global::max_file_length my $testdir = shift; my $upper = 8_000_000; # Dir length of 8 chars is supported everywhere my $len = 8; my $dir = "x"x$len; do { rmdir($testdir."/".$dir); $len *= 16; $dir = "x"x$len; } while ($len < $upper and mkdir $testdir."/".$dir); # Then search for the actual max length between $len/16 and $len my $min = $len/16; my $max = $len; while($max-$min > 5) { # If we are within 5 chars of the exact value: # it is not worth the extra time to find the exact value my $test = int(($min+$max)/2); $dir = "x"x$test; if(mkdir $testdir."/".$dir) { rmdir($testdir."/".$dir); $min = $test; } else { $max = $test; } } $Global::max_file_length = $min; return $min; } sub set_fh { # Set file handle my ($self, $fd_no, $key, $fh) = @_; $self->{'fd'}{$fd_no,$key} = $fh; } sub fh { # Get file handle my ($self, $fd_no, $key) = @_; return $self->{'fd'}{$fd_no,$key}; } sub write { my $self = shift; my $remaining_ref = shift; my $stdin_fh = $self->fh(0,"w"); my $len = length $$remaining_ref; # syswrite may not write all in one go, # so make sure everything is written. while($len) { my $written = syswrite($stdin_fh,$$remaining_ref); substr($$remaining_ref,0,$written) = ""; $len -= $written; } } sub set_block { # Copy stdin buffer from $block_ref up to $endpos # Prepend with $header_ref if virgin (i.e. not --roundrobin) # Remove $recstart and $recend if needed # Input: # $header_ref = ref to $header to prepend # $buffer_ref = ref to $buffer containing the block # $endpos = length of $block to pass on # $recstart = --recstart regexp # $recend = --recend regexp # Returns: # N/A my $self = shift; my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_; $self->{'block'} = ($self->virgin() ? $$header_ref : ""). substr($$buffer_ref,0,$endpos); if($opt::remove_rec_sep) { remove_rec_sep(\$self->{'block'},$recstart,$recend); } $self->{'block_length'} = length $self->{'block'}; $self->{'block_pos'} = 0; $self->add_transfersize($self->{'block_length'}); } sub block_ref { my $self = shift; return \$self->{'block'}; } sub block_length { my $self = shift; return $self->{'block_length'}; } sub remove_rec_sep { my ($block_ref,$recstart,$recend) = @_; # Remove record separator $$block_ref =~ s/$recend$recstart//gos; $$block_ref =~ s/^$recstart//os; $$block_ref =~ s/$recend$//os; } sub non_blocking_write { my $self = shift; my $something_written = 0; use POSIX qw(:errno_h); # for loop used to avoid copying substr: $buf will be an alias for the substr for my $buf (substr($self->{'block'},$self->{'block_pos'})) { my $in = $self->fh(0,"w"); my $rv = syswrite($in, $buf); if (!defined($rv) && $! == EAGAIN) { # would block $something_written = 0; } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) { # incomplete write # Remove the written part $self->{'block_pos'} += $rv; $something_written = $rv; } else { # successfully wrote everything my $a = ""; $self->set_block(\$a,\$a,0,"",""); $something_written = $rv; } } ::debug("pipe", "Non-block: ", $something_written); return $something_written; } sub virgin { my $self = shift; return $self->{'virgin'}; } sub set_virgin { my $self = shift; $self->{'virgin'} = shift; } sub pid { my $self = shift; return $self->{'pid'}; } sub set_pid { my $self = shift; $self->{'pid'} = shift; } sub starttime { # Returns: # UNIX-timestamp this job started my $self = shift; return sprintf("%.3f",$self->{'starttime'}); } sub set_starttime { my $self = shift; my $starttime = shift || ::now(); $self->{'starttime'} = $starttime; $opt::sqlworker and $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(), $starttime); } sub runtime { # Returns: # Run time in seconds with 3 decimals my $self = shift; return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000); } sub endtime { # Returns: # UNIX-timestamp this job ended # 0 if not ended yet my $self = shift; return ($self->{'endtime'} || 0); } sub set_endtime { my $self = shift; my $endtime = shift; $self->{'endtime'} = $endtime; $opt::sqlworker and $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(), $self->runtime()); } sub is_timedout { # Is the job timedout? # Input: # $delta_time = time that the job may run # Returns: # True or false my $self = shift; my $delta_time = shift; return time > $self->{'starttime'} + $delta_time; } sub kill { my $self = shift; $self->set_exitstatus(-1); ::kill_sleep_seq($self->pid()); } sub failed { # return number of times failed for this $sshlogin # Input: # $sshlogin # Returns: # Number of times failed for $sshlogin my $self = shift; my $sshlogin = shift; return $self->{'failed'}{$sshlogin}; } sub failed_here { # return number of times failed for the current $sshlogin # Returns: # Number of times failed for this sshlogin my $self = shift; return $self->{'failed'}{$self->sshlogin()}; } sub add_failed { # increase the number of times failed for this $sshlogin my $self = shift; my $sshlogin = shift; $self->{'failed'}{$sshlogin}++; } sub add_failed_here { # increase the number of times failed for the current $sshlogin my $self = shift; $self->{'failed'}{$self->sshlogin()}++; } sub reset_failed { # increase the number of times failed for this $sshlogin my $self = shift; my $sshlogin = shift; delete $self->{'failed'}{$sshlogin}; } sub reset_failed_here { # increase the number of times failed for this $sshlogin # # VGhlIGZpcnN0IGNvb2tpZSB3YXMgcmVsZWFzZWQgb24gMjAxMS0wMS0yNCBh # bmQgd2FzIHdvbiBieSBBRXZhcgpBcm5mam9yZCBCamFybWFzb24gb24gMjAx # MS0wNC0xMC4gVGhlIHNlY29uZCB3YXMgcmVsZWFzZWQgb24KMjAxMy0wOC0x # OCBhbmQgd29uIG9uIDIwMTUtMDctMTkgYnkgTWFyayBNYWltb25lLgoKRm9y # IHRoaXMgY29va2llIHlvdSBoYXZlIHRvIHVzZSBiYXNlNjQgdG8gZGVjb2Rl # LiBQbGVhc2UgZW1haWwKY29va2llQHRhbmdlLmRrIHdoZW4geW91IHJlYWQg # dGhpcy4KCkkgYW0gdGhlIG1haW50YWluZXIgb2YgYSBwaWVjZSBvZiBmcmVl # IHNvZnR3YXJlIGNhbGxlZCBHTlUKUGFyYWxsZWwuIEZyZWUgc29mdHdhcmUg # Z3VhcmFudGVlcyB5b3UgYWNjZXNzIHRvIHRoZSBzb3VyY2UKY29kZSwgYnV0 # IEkgaGF2ZSBiZWVuIHdvbmRlcmluZyBob3cgbWFueSBhY3R1YWxseSBfcmVh # ZF8gdGhlCnNvdXJjZSBjb2RlLgoKVG8gdGVzdCB0aGlzIEkgcHV0IGluIGEg # Y29tbWVudCB0ZWxsaW5nIHBlb3BsZSB0byBlbWFpbCBtZSB3aGVuCnRoZXkg # cmVhZCB0aGlzLiBUaGUgY29tbWVudCB3YXMgcHV0IGluIGEgc2VjdGlvbiBv # ZiB0aGUgY29kZQp0aGF0IG5vIG9uZSB3b3VsZCBsb29rIHRvIGZpeCBvciBp # bXByb3ZlIHRoZSBzb2Z0d2FyZSAtIHNvIHRoZQpzb3VyY2UgY29kZSBlcXVp # dmFsZW50IHRvIGEgZHVzdHkgY29ybmVyLiBUbyBtYWtlIHN1cmUgdGhlCmNv # bW1lbnQgd291bGQgbm90IHNob3cgdXAgaWYgc29tZSBvbmUganVzdCBncmVw # cGVkIHRocm91Z2ggdGhlCnNvdXJjZSBjb2RlIEkgcm90MTMnZWQgdGhlIHNv # dXJjZSBjb2RlCmh0dHA6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvUk9UMTMK # CjIuNSBtb250aHMgbGF0ZXIgSSByZWNlaXZlZCBhbiBlbWFpbCBmcm9tIHNv # bWVvbmUgd2hvIG5vdCBvbmx5Cm1hbmFnZWQgdG8gZmluZCB0aGUgY29tbWVu # dCwgYnV0IGFsc28gbWFuYWdlZCB0byBndWVzcyB0aGUgY29kZQpoYWQgdG8g # YmUgcm90MTMnZWQuCgpUbyBzZWUgaWYgdGhpcyB3YXMgYSBmbHVrZSBldmVu # dCBJIHJldHJpZWQgdGhlIHRlc3QuIFRoaXMgdGltZSAKaXQgdG9vayBhIGxp # dHRsZSBsb25nZXIsIGJ1dCBhZnRlciAyMyBtb250aHMgaXQgdG9vIHdhcyBm # b3VuZC4KClRoaXMgYnJpbmdzIG1lIHRvIHRoZSBjb25jbHVzaW9uIHRoYXQg # dGhlcmUgX2FyZV8gcGVvcGxlLCB3aG8KYXJlIG5vdCBhZmZpbGlhdGVkIHdp # dGggdGhlIHByb2plY3QsIHRoYXQgd2lsbCByZWFkIHRoZSBzb3VyY2UKY29k # ZSAtIHRob3VnaCBpdCBtYXkgbm90IGhhcHBlbiBhbGwgdGhlIHRpbWUuCg== my $self = shift; delete $self->{'failed'}{$self->sshlogin()}; } sub min_failed { # Returns: # the number of sshlogins this command has failed on # the minimal number of times this command has failed my $self = shift; my $min_failures = ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; return ($number_of_sshlogins_failed_on,$min_failures); } sub total_failed { # Returns: # $total_failures = the number of times this command has failed my $self = shift; my $total_failures = 0; for (values %{$self->{'failed'}}) { $total_failures += $_; } return $total_failures; } { my $script; sub postpone_exit_and_cleanup { # Command to remove files and dirs (given as args) without # affecting the exit value in $?/$status. if(not $script) { $script = "perl -e '". ::spacefree(0,q{ $bash=shift; $csh=shift; for(@ARGV){ unlink; rmdir; } if($bash=~s/h//) { exit $bash; } exit $csh; }). "' ".'"$?h" "$status" '; } return $script } } { my $script; sub fifo_wrap { # Script to create a fifo, run a command on the fifo # while copying STDIN to the fifo, and finally # remove the fifo and return the exit code of the command. if(not $script) { # {} == $PARALLEL_TMP for --fifo # To make it csh compatible a wrapper needs to: # * mkfifo # * spawn $command & # * cat > fifo # * waitpid to get the exit code from $command # * be less than 1000 chars long $script = "perl -e '". (::spacefree (0, q{ ($s,$c,$f) = @ARGV; # mkfifo $PARALLEL_TMP system "mkfifo", $f; # spawn $shell -c $command & $pid = fork || exec $s, "-c", $c; open($o,">",$f) || die $!; # cat > $PARALLEL_TMP while(sysread(STDIN,$buf,131072)){ syswrite $o, $buf; } close $o; # waitpid to get the exit code from $command waitpid $pid,0; # Cleanup unlink $f; exit $?/256; }))."'"; } return $script; } } sub wrapped { # Wrap command with: # * --shellquote # * --nice # * --cat # * --fifo # * --sshlogin # * --pipepart (@Global::cat_partials) # * --pipe # * --tmux # The ordering of the wrapping is important: # * --nice/--cat/--fifo should be done on the remote machine # * --pipepart/--pipe should be done on the local machine inside --tmux # Uses: # $Global::envvar # $opt::shellquote # $opt::nice # $Global::shell # $opt::cat # $opt::fifo # @Global::cat_partials # $opt::pipe # $opt::tmux # Returns: # $self->{'wrapped'} = the command wrapped with the above my $self = shift; if(not defined $self->{'wrapped'}) { my $command = $self->replaced(); if($opt::shellquote) { # Prepend /bin/echo (echo no-/bin is wrong in csh) # and quote twice $command = "/bin/echo " . ::shell_quote_scalar(::shell_quote_scalar($command)); } if($opt::cat) { # In '--cat' and '--fifo' {} == $PARALLEL_TMP. # This is to make it possible to compute $PARALLEL_TMP on # the fly when running remotely. # $ENV{PARALLEL_TMP} is set in the remote wrapper before # the command is run. # # Prepend 'cat > $PARALLEL_TMP;' # Append 'unlink $PARALLEL_TMP without affecting $?' $command = 'cat > $PARALLEL_TMP;'. $command.";". postpone_exit_and_cleanup(). '$PARALLEL_TMP'; } elsif($opt::fifo) { # Prepend 'mkfifo {}; (' # Append ') & cat > {}; wait; ' # Append 'unlink {} without affecting $?' $command = fifo_wrap(). " ". $Global::shell. " ". ::shell_quote_scalar($command). ' $PARALLEL_TMP'. ';'; } if($ENV{'PARALLEL_ENV'}) { if(-e $ENV{'PARALLEL_ENV'}) { # This is a file/fifo: Replace envvar with content of file open(my $parallel_env, "<", $ENV{'PARALLEL_ENV'}) || ::die_bug("Cannot read parallel_env from $ENV{'PARALLEL_ENV'}"); local $/; $ENV{'PARALLEL_ENV'} = <$parallel_env>; close $parallel_env; } # If $PARALLEL_ENV set, put that in front of the command # Used for importing functions for fish # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV $ENV{'PARALLEL_ENV'} =~ s/\001/\n/g; $command = $ENV{'PARALLEL_ENV'}."\n".$command; } # Wrap with ssh + tranferring of files $command = $self->sshlogin_wrap($command); if(@Global::cat_partials) { # Prepend: # < /tmp/foo perl -e 'while(@ARGV) { # sysseek(STDIN,shift,0) || die; $left = shift; # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){ # $left -= $read; syswrite(STDOUT,$buf); # } # }' 0 0 0 11 | $command = (shift @Global::cat_partials). " | ($command)"; } elsif($opt::pipe) { # Wrap with EOF-detector to avoid starting $command if EOF. $command = empty_input_wrapper($command); } if($opt::tmux) { # Wrap command with 'tmux' $command = $self->tmux_wrap($command); } if($Global::cshell and length $command > 499) { # csh does not like words longer than 1000 (499 quoted) # bzip2 breaks --sql mysql://... # $command = "perl -e '".base64_zip_eval()."' ". # join" ",string_zip_base64('exec "'.::perl_quote_scalar($command).'"'); $command = "perl -e '".base64_eval()."' ". join" ",string_base64('exec "'.::perl_quote_scalar($command).'"'); } $self->{'wrapped'} = $command; } return $self->{'wrapped'}; } sub set_sshlogin { my $self = shift; my $sshlogin = shift; $self->{'sshlogin'} = $sshlogin; delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong delete $self->{'wrapped'}; $opt::sqlworker and $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $sshlogin->string()); } sub sshlogin { my $self = shift; return $self->{'sshlogin'}; } sub string_base64 { # Base64 encode it into 1000 byte blocks. # 1000 bytes is the largest word size csh supports # Input: # @strings = to be encoded # Returns: # @base64 = 1000 byte block $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; my @base64 = unpack("(A1000)*",encode_base64((join"",@_),"")); return @base64; } sub string_zip_base64 { # Pipe string through 'bzip2 -9' and base64 encode it into 1000 # byte blocks. # 1000 bytes is the largest word size csh supports # Input: # @strings = to be encoded # Returns: # @base64 = 1000 byte block my($zipin_fh, $zipout_fh,@base64); ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9"); if(fork) { close $zipin_fh; $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; # Split base64 encoded into 1000 byte blocks @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),"")); close $zipout_fh; } else { close $zipout_fh; print $zipin_fh @_; close $zipin_fh; exit; } ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n"); return @base64; } sub base64_zip_eval { # Script that: # * reads base64 strings from @ARGV # * decodes them # * pipes through 'bzip2 -dc' # * evals the result # Reverse of string_zip_base64 + eval # Will be wrapped in ' so single quote is forbidden # Returns: # $script = 1-liner for perl -e my $script = ::spacefree(0,q{ @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); eval "@GNU_Parallel"; $SIG{CHLD}="IGNORE"; # Search for bzip2. Not found => use default path my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2"; # $in = stdin on $zip, $out = stdout from $zip my($in, $out,$eval); open3($in,$out,">&STDERR",$zip,"-dc"); if(my $perlpid = fork) { close $in; $eval = join "", <$out>; close $out; } else { close $out; # Pipe decoded base64 into 'bzip2 -dc' print $in (decode_base64(join"",@ARGV)); close $in; exit; } wait; eval $eval; }); ::debug("base64",$script,"\n"); return $script; } sub base64_eval { # Script that: # * reads base64 strings from @ARGV # * decodes them # * evals the result # Reverse of string_base64 + eval # Will be wrapped in ' so single quote is forbidden # Returns: # $script = 1-liner for perl -e my $script = ::spacefree(0,q{ @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); eval "@GNU_Parallel"; my $eval; $eval = decode_base64(join"",@ARGV); eval $eval; }); ::debug("base64",$script,"\n"); return $script; } sub sshlogin_wrap { # Wrap the command with the commands needed to run remotely # Input: # $command = command to run # Returns: # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands sub monitor_parent_sshd_script { # This script is to solve the problem of # * not mixing STDERR and STDOUT # * terminating with ctrl-c # If its parent is ssh: all good # If its parent is init(1): ssh died, so kill children my $monitor_parent_sshd_script; if(not $monitor_parent_sshd_script) { $monitor_parent_sshd_script = # This will be packed in ', so only use " ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'. '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'. '$nice = '.$opt::nice.';'. q{ # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR do { $ENV{PARALLEL_TMP} = $tmpdir."/par". join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); } while(-e $ENV{PARALLEL_TMP}); $SIG{CHLD} = sub { $done = 1; }; $pid = fork; unless($pid) { # Make own process group to be able to kill HUP it later setpgrp; eval { setpriority(0,0,$nice) }; exec $shell, "-c", ($bashfunc."@ARGV"); die "exec: $!\n"; } do { # Parent is not init (ppid=1), so sshd is alive # Exponential sleep up to 1 sec $s = $s < 1 ? 0.001 + $s * 1.03 : $s; select(undef, undef, undef, $s); } until ($done || getppid == 1); # Kill HUP the process group if job not done kill(SIGHUP, -${pid}) unless $done; wait; exit ($?&127 ? 128+($?&127) : 1+$?>>8) }); } return $monitor_parent_sshd_script; } sub vars_to_export { # Uses: # @opt::env my @vars = ("parallel_bash_environment"); for my $varstring (@opt::env) { # Split up --env VAR1,VAR2 push @vars, split /,/, $varstring; } for (@vars) { if(-r $_ and not -d) { # Read as environment definition bug #44041 # TODO parse this my $fh = ::open_or_exit($_); $Global::envdef = join("",<$fh>); close $fh; } } if(grep { /^_$/ } @vars) { # --env _ # Include all vars that are not in a clean environment if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) { my @ignore = <$vars_fh>; chomp @ignore; my %ignore; @ignore{@ignore} = @ignore; close $vars_fh; push @vars, grep { not defined $ignore{$_} } keys %ENV; @vars = grep { not /^_$/ } @vars; } else { ::error("Run '$Global::progname --record-env' in a clean environment first."); ::wait_and_exit(255); } } # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2) # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%% push(@vars, "PARALLEL_PID", "PARALLEL_SEQ", map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars); # Keep only defined variables return grep { defined($ENV{$_}) } @vars; } sub env_as_eval { # Returns: # $eval = '$ENV{"..."}=...; ...' my @vars = vars_to_export(); my $csh_friendly = not grep { /\n/ } @ENV{@vars}; my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars; # eval of @envset will set %ENV my $envset = join"", map { '$ENV{"'.::perl_quote_scalar($_).'"}="'. ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions; # running @bashfunc on the command line, will set the functions my @bashfunc = map { my $v=$_; s/BASH_FUNC_(.*)(\(\)|%%)/$1/; "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions; # eval $bashfuncset will set $bashfunc my $bashfuncset; if(@bashfunc) { # Functions are not supported for all shells if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) { ::warning("Shell functions may not be supported in $Global::shell."); } $bashfuncset = '@bash_functions=qw('."@bash_functions".");". ::spacefree(1,q{ if($ENV{"SHELL"}=~/csh/) { print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n"; exec "false"; } }). "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";'; } else { $bashfuncset = '$bashfunc = "";' } if($ENV{"parallel_bash_environment"}) { $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";'; } ::debug("base64",$envset,$bashfuncset,"\n"); return $csh_friendly,$envset,$bashfuncset; } my $self = shift; my $command = shift; # TODO test that *sh -c 'parallel --env' use *sh if(not defined $self->{'sshlogin_wrap'}) { my $sshlogin = $self->sshlogin(); my $serverlogin = $sshlogin->serverlogin(); my $quoted_remote_command; $ENV{'PARALLEL_SEQ'} = $self->seq(); $ENV{'PARALLEL_PID'} = $$; if($serverlogin eq ":") { if($opt::workdir) { # Create workdir if needed. Then cd to it. my $wd = $self->workdir(); if($opt::workdir eq "." or $opt::workdir eq "...") { # If $wd does not start with '/': Prepend $HOME $wd =~ s:^([^/]):$ENV{'HOME'}/$1:; } ::mkdir_or_die($wd); $command = "cd ".::shell_quote_scalar($wd)." || exit 255; ".$command; } if(@opt::env) { # Prepend with environment setter, which sets functions in zsh my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); my $env_command = $envset.$bashfuncset. '@ARGV="'.::perl_quote_scalar($command).'";'. "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;"; if(length $env_command > 999 or not $csh_friendly or $command =~ /\n/) { # csh does not deal well with > 1000 chars in one word # csh does not deal well with $ENV with \n # bzip2 breaks --sql mysql://... # $env_command = "perl -e '".base64_zip_eval()."' ". # join" ",string_zip_base64($env_command); $env_command = "perl -e '".base64_eval()."' ". join" ",string_base64($env_command); $self->{'sshlogin_wrap'} = $env_command; } else { $self->{'sshlogin_wrap'} = "perl -e ".::shell_quote_scalar($env_command); } } else { $self->{'sshlogin_wrap'} = $command; } } else { my $pwd = ""; if($opt::workdir) { # Create remote workdir if needed. Then cd to it. my $wd = $self->workdir(); $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}. qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;}; } my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); my $remote_command = $pwd.$envset.$bashfuncset. '@ARGV="'.::perl_quote_scalar($command).'";'. monitor_parent_sshd_script(); $quoted_remote_command = "perl -e ".::shell_quote_scalar($remote_command); my $dq_remote_command = ::shell_quote_scalar($quoted_remote_command); if(length $dq_remote_command > 999 or not $csh_friendly or $command =~ /\n/) { # csh does not deal well with > 1000 chars in one word # csh does not deal well with $ENV with \n # bzip2 breaks --sql mysql://... # $quoted_remote_command = "perl -e \\''".base64_zip_eval()."'\\' "."". # join" ",string_zip_base64($remote_command); $quoted_remote_command = "perl -e \\''".base64_eval()."'\\' ". join" ",string_base64($remote_command); } else { $quoted_remote_command = $dq_remote_command; } my $sshcmd = $sshlogin->sshcommand(); my ($pre,$post,$cleanup)=("","",""); # --transfer $pre .= $self->sshtransfer(); # --return $post .= $self->sshreturn(); # --cleanup $post .= $self->sshcleanup(); if($post) { # We need to save the exit status of the job $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;'; } $self->{'sshlogin_wrap'} = ($pre . "$sshcmd $serverlogin -- exec " . $quoted_remote_command . ";" . $post); } } return $self->{'sshlogin_wrap'}; } sub transfer { # Files to transfer # Non-quoted and with {...} substituted # Returns: # @transfer - File names of files to transfer my $self = shift; $self->{'transfersize'} = 0; my @transfer = $self->{'commandline'}-> replace_placeholders($self->{'commandline'}{'transfer_files'},0,0); for(@transfer) { # filesize if(-e $_) { $self->{'transfersize'} += (stat($_))[7]; } } return @transfer; } sub transfersize { my $self = shift; return $self->{'transfersize'}; } sub add_transfersize { my $self = shift; my $transfersize = shift; $self->{'transfersize'} += $transfersize; $opt::sqlworker and $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(), $self->{'transfersize'}); } sub sshtransfer { # Returns for each transfer file: # rsync $file remote:$workdir my $self = shift; my @pre; my $sshlogin = $self->sshlogin(); my $workdir = $self->workdir(); for my $file ($self->transfer()) { push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; } return join("",@pre); } sub return { # Files to return # Non-quoted and with {...} substituted # Returns: # @non_quoted_filenames my $self = shift; return $self->{'commandline'}-> replace_placeholders($self->{'commandline'}{'return_files'},0,0); } sub returnsize { # This is called after the job has finished # Returns: # $number_of_bytes transferred in return my $self = shift; for my $file ($self->return()) { if(-e $file) { $self->{'returnsize'} += (stat($file))[7]; } } return $self->{'returnsize'}; } sub add_returnsize { my $self = shift; my $returnsize = shift; $self->{'returnsize'} += $returnsize; $opt::sqlworker and $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(), $self->{'returnsize'}); } sub sshreturn { # Returns for each return-file: # rsync remote:$workdir/$file . my $self = shift; my $sshlogin = $self->sshlogin(); my $sshcmd = $sshlogin->sshcommand(); my $serverlogin = $sshlogin->serverlogin(); my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd); my $pre = ""; for my $file ($self->return()) { $file =~ s:^\./::g; # Remove ./ if any my $relpath = ($file !~ m:^/:); # Is the path relative? my $cd = ""; my $wd = ""; if($relpath) { # rsync -avR /foo/./bar/baz.c remote:/tmp/ # == (on old systems) # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ $wd = ::shell_quote_file($self->workdir()."/"); } # Only load File::Basename if actually needed $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; # dir/./file means relative to dir, so remove dir on remote $file =~ m:(.*)/\./:; my $basedir = $1 ? ::shell_quote_file($1."/") : ""; my $nobasedir = $file; $nobasedir =~ s:.*/\./::; $cd = ::shell_quote_file(::dirname($nobasedir)); my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync"); my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file))); # --return # mkdir -p /home/tange/dir/subdir/; # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync" # server:file.gz /home/tange/dir/subdir/ $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:". $basename . " ".$basedir.$cd.";"; } return $pre; } sub sshcleanup { # Return the sshcommand needed to remove the file # Returns: # ssh command needed to remove files from sshlogin my $self = shift; my $sshlogin = $self->sshlogin(); my $sshcmd = $sshlogin->sshcommand(); my $serverlogin = $sshlogin->serverlogin(); my $workdir = $self->workdir(); my $cleancmd = ""; for my $file ($self->cleanup()) { my @subworkdirs = parentdirs_of($file); $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; } if(defined $opt::workdir and $opt::workdir eq "...") { $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::shell_quote_scalar($workdir).';'; } return $cleancmd; } sub cleanup { # Returns: # Files to remove at cleanup my $self = shift; if($opt::cleanup) { my @transfer = $self->transfer(); my @return = $self->return(); return (@transfer,@return); } else { return (); } } sub workdir { # Returns: # the workdir on a remote machine my $self = shift; if(not defined $self->{'workdir'}) { my $workdir; if(defined $opt::workdir) { if($opt::workdir eq ".") { # . means current dir my $home = $ENV{'HOME'}; eval 'use Cwd'; my $cwd = cwd(); $workdir = $cwd; if($home) { # If homedir exists: remove the homedir from # workdir if cwd starts with homedir # E.g. /home/foo/my/dir => my/dir # E.g. /tmp/my/dir => /tmp/my/dir my ($home_dev, $home_ino) = (stat($home))[0,1]; my $parent = ""; my @dir_parts = split(m:/:,$cwd); my $part; while(defined ($part = shift @dir_parts)) { $part eq "" and next; $parent .= "/".$part; my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; if($parent_dev == $home_dev and $parent_ino == $home_ino) { # dev and ino is the same: We found the homedir. $workdir = join("/",@dir_parts); last; } } } if($workdir eq "") { $workdir = "."; } } elsif($opt::workdir eq "...") { $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ . "-" . $self->seq(); } else { $workdir = $self->{'commandline'}-> replace_placeholders([$opt::workdir],0,0); #$workdir = $opt::workdir; # Rsync treats /./ special. We dont want that $workdir =~ s:/\./:/:g; # Remove /./ $workdir =~ s:(.)/+$:$1:; # Remove ending / if any $workdir =~ s:^\./::g; # Remove starting ./ if any } } else { $workdir = "."; } $self->{'workdir'} = ::shell_quote_scalar($workdir); } return $self->{'workdir'}; } sub parentdirs_of { # Return: # all parentdirs except . of this dir or file - sorted desc by length my $d = shift; my @parents = (); while($d =~ s:/[^/]+$::) { if($d ne ".") { push @parents, $d; } } return @parents; } sub start { # Setup STDOUT and STDERR for a job and start it. # Returns: # job-object or undef if job not to run sub open3_setpgrp_internal { # Run open3+setpgrp followed by the command # Input: # $stdin_fh = Filehandle to use as STDIN # $stdout_fh = Filehandle to use as STDOUT # $stderr_fh = Filehandle to use as STDERR # $command = Command to run # Returns: # $pid = Process group of job started my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; my $pid; local (*OUT,*ERR); open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); # The eval is needed to catch exception from open3 eval { if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) { # Each child gets its own process group to make it safe to killall setpgrp(0,0); exec("exec $Global::shell -c ".::shell_quote_scalar_default($command)) || ::die_bug("open3-$stdin_fh $command"); } }; return $pid; } sub open3_setpgrp_external { # Run open3 on $command wrapped with a perl script doing setpgrp # Works on systems that do not support open3(,,,"-") # Input: # $stdin_fh = Filehandle to use as STDIN # $stdout_fh = Filehandle to use as STDOUT # $stderr_fh = Filehandle to use as STDERR # $command = Command to run # Returns: # $pid = Process group of job started my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; local (*OUT,*ERR); open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); my $pid; my @setpgrp_wrap = ('perl','-e', "setpgrp\;eval\{setpriority\(0,0,$opt::nice\)\}\;". "exec '$Global::shell', '-c', \@ARGV"); # The eval is needed to catch exception from open3 eval { $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command) || ::die_bug("open3-$stdin_fh"); 1; }; return $pid; } sub open3_setpgrp { # If the OS supports open3(x,x,x,"-") use that # eval { if(not $pid=::open3($i,$o,$e,"-")) { exit } } # if $!: external # TODO build a selector that works with out side effects no warnings 'redefine'; if(1) { # Does not support open3(x,x,x,"-") *open3_setpgrp = \&open3_setpgrp_external; } else { # Supports open3(x,x,x,"-") *open3_setpgrp = \&open3_setpgrp_internal; } # The sub is now redefined. Call it return open3_setpgrp(@_); } my $job = shift; # Get the shell command to be executed (possibly with ssh infront). my $command = $job->wrapped(); my $pid; if($job->{'commandline'}{'skip'}) { # $job->skip() was called $command = "true"; } if($Global::interactive or $Global::stderr_verbose) { $job->interactive_start(); } $job->openoutputfiles(); my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); if($opt::ungroup or $opt::sqlworker) { print_dryrun_and_verbose($stdout_fh,$job,$command); } if($opt::dryrun or $opt::sql) { $command = "true"; } $ENV{'PARALLEL_SEQ'} = $job->seq(); $ENV{'PARALLEL_PID'} = $$; $ENV{'PARALLEL_TMP'} = ::tmpname("par"); ::debug("run", $Global::total_running, " processes . Starting (", $job->seq(), "): $command\n"); if($opt::pipe) { my ($stdin_fh) = ::gensym(); $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command); $job->set_fh(0,"w",$stdin_fh); } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and open(my $devtty_fh, "<", "/dev/tty")) { # Give /dev/tty to the command if no one else is using it # The eval is needed to catch exception from open3 eval { no warnings; local (*IN) = $devtty_fh; local (*OUT,*ERR); open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); $pid = ::open3("<&IN", ">&OUT", ">&ERR", "exec $Global::shell -c ". ::shell_quote_scalar_default($command)) || ::die_bug("open3-/dev/tty"); $Global::tty_taken = $pid; close $devtty_fh; 1; }; } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1 and $job->sshlogin()->string() eq ":") { # Give STDIN to the first job if using -a (but only if running # locally - otherwise CTRL-C does not work for other jobs Bug#36585) local (*IN,*OUT,*ERR); open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); *IN = *STDIN; # The eval is needed to catch exception from open3 my @setpgrp_wrap = ('perl','-e', "setpgrp\;eval\{setpriority\(0,0,$opt::nice\)\}\;". "exec '$Global::shell', '-c', \@ARGV"); eval { $pid = ::open3("<&IN", ">&OUT", ">&ERR", @setpgrp_wrap, $command) || ::die_bug("open3-<IN"); 1; }; # Re-open to avoid complaining open(STDIN, "<&", $Global::original_stdin) or ::die_bug("dup-\$Global::original_stdin: $!"); } else { $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command); } if($pid) { # A job was started $Global::total_running++; $Global::total_started++; $job->set_pid($pid); $job->set_starttime(); $Global::running{$job->pid()} = $job; if($opt::timeout) { $Global::timeoutq->insert($job); } $Global::newest_job = $job; $Global::newest_starttime = ::now(); return $job; } else { # No more processes ::debug("run", "Cannot spawn more jobs.\n"); return undef; } } sub interactive_start { my $self = shift; my $command = $self->wrapped(); if($Global::interactive) { ::status("$command ?..."); open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); my $answer = <$tty_fh>; close $tty_fh; my $run_yes = ($answer =~ /^\s*y/i); if (not $run_yes) { $self->{'commandline'}->skip(); } } else { print $Global::original_stderr "$command\n"; } } sub print_dryrun_and_verbose { # For $opt::ungroup we print these ASAP # For $opt::group they are part of print() my $stdout_fh = shift; my $job = shift; my $command = shift; if($opt::dryrun or $Global::verbose) { if($Global::verbose <= 1) { print $stdout_fh $job->replaced(),"\n"; } else { # Verbose level > 1: Print the rsync and stuff print $stdout_fh $command,"\n"; } } if($opt::sqlworker) { $Global::sql->update("SET Command = ? WHERE Seq = ".$job->seq(), $job->replaced()); } } { my $tmuxsocket; sub tmux_wrap { # Wrap command with tmux for session pPID # Input: # $actual_command = the actual command being run (incl ssh wrap) my $self = shift; my $actual_command = shift; # Temporary file name. Used for fifo to communicate exit val my $tmpfifo=::tmpname("tmx"); if(length($tmpfifo) >=100) { ::error("tmux does not support sockets with path > 100."); ::wait_and_exit(255); } my $visual_command = $self->replaced(); my $title = $visual_command; if($visual_command =~ /\0/) { ::error("Command line contains NUL. tmux is confused by NUL."); ::wait_and_exit(255); } # ; causes problems # ascii 194-245 annoys tmux $title =~ tr/[\011-\016;\302-\365]//d; $title = ::shell_quote_scalar($title); my $l_act = length($actual_command); my $l_tit = length($title); my $l_fifo = length($tmpfifo); # The line to run contains a 118 chars extra code + the title 2x my $l_tot = 2 * $l_tit + $l_act + $l_fifo; while($l_tit < 1000 and ( (890 < $l_tot and $l_tot < 1350) or (9250 < $l_tot and $l_tot < 9800) )) { # tmux blocks for certain lengths: # 900 < title + command < 1200 # 9250 < title + command < 9800 # but only if title < 1000, so expand the title with 75 spaces # The measured lengths are: # 996 < (title + whole command) < 1127 # 9331 < (title + whole command) < 9636 $title = $title.('\ 'x75); $l_tit = length($title); $l_tot = 2 * $l_tit + $l_act + $l_fifo; } my $tmux; $ENV{'TMUX'} ||= "tmux"; if(not $tmuxsocket) { $tmuxsocket = ::tmpname("tms"); ::status("See output with: $ENV{'TMUX'} -S $tmuxsocket attach\n"); } $tmux = "sh -c '". $ENV{'TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" . $ENV{'TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title"; ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ", $Limits::Command::line_max_len, " tot ", $l_tot, "\n"); return "mkfifo $tmpfifo && $tmux ". # Run in tmux ::shell_quote_scalar ( "(".$actual_command.');'. # The triple print is needed - otherwise the testsuite fails q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&". "echo $title; echo \007Job finished at: `date`;sleep 10" ). # Run outside tmux # Read a / separated line: 0h/2 for csh, 2/0 for bash. # If csh the first will be 0h, so use the second as exit value. # Otherwise just use the first value as exit value. q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo; } } sub is_already_in_results { # Do we already have results for this job? # Returns: # $job_already_run = bool whether there is output for this or not my $job = $_[0]; my $args_as_dirname = $job->{'commandline'}->args_as_dirname(); # prefix/name1/val1/name2/val2/ my $dir = $opt::results."/".$args_as_dirname; ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n"); return -e "$dir/stdout"; } sub is_already_in_joblog { my $job = shift; return vec($Global::job_already_run,$job->seq(),1); } sub set_job_in_joblog { my $job = shift; vec($Global::job_already_run,$job->seq(),1) = 1; } sub should_be_retried { # Should this job be retried? # Returns # 0 - do not retry # 1 - job queued for retry my $self = shift; if (not $opt::retries) { return 0; } if(not $self->exitstatus() and not $self->exitsignal()) { # Completed with success. If there is a recorded failure: forget it $self->reset_failed_here(); return 0; } else { # The job failed. Should it be retried? $self->add_failed_here(); if($self->total_failed() == $opt::retries) { # This has been retried enough return 0; } else { # This command should be retried $self->set_endtime(undef); $self->reset_exitstatus(); $Global::JobQueue->unget($self); ::debug("run", "Retry ", $self->seq(), "\n"); return 1; } } } { my (%print_later,$job_end_sequence); sub print_earlier_jobs { # Print jobs whose output is postponed due to --keep-order # Returns: N/A my $job = shift; $print_later{$job->seq()} = $job; $job_end_sequence ||= 1; ::debug("run", "Looking for: $job_end_sequence ", "Current: ", $job->seq(), "\n"); for(my $j = $print_later{$job_end_sequence}; $j or vec($Global::job_already_run,$job_end_sequence,1); $job_end_sequence++, $j = $print_later{$job_end_sequence}) { ::debug("run", "Found job end $job_end_sequence"); if($j) { $j->print(); delete $print_later{$job_end_sequence}; } } } } sub print { # Print the output of the jobs # Returns: N/A my $self = shift; ::debug("print", ">>joboutput ", $self->replaced(), "\n"); if($opt::dryrun) { # Nothing was printed to this job: # cleanup tmp files if --files was set unlink $self->fh(1,"name"); } if($opt::pipe and $self->virgin()) { # Skip --joblog, --dryrun, --verbose } else { if($opt::ungroup and $Global::joblog and defined $self->{'exitstatus'}) { # Add to joblog when finished $self->print_joblog(); # Printing is only relevant for grouped/--line-buffer output. $opt::ungroup and return; } # Check for disk full ::exit_if_disk_full(); if(($opt::dryrun or $Global::verbose) and not $self->{'verbose_printed'} and not $opt::sql and not $opt::sqlworker) { $self->{'verbose_printed'}++; if($Global::verbose <= 1) { print STDOUT $self->replaced(),"\n"; } else { # Verbose level > 1: Print the rsync and stuff print STDOUT $self->wrapped(),"\n"; } # If STDOUT and STDERR are merged, # we want the command to be printed first # so flush to avoid STDOUT being buffered flush STDOUT; } } for my $fdno (sort { $a <=> $b } keys %Global::fd) { # Sort by file descriptor numerically: 1,2,3,..,9,10,11 $fdno == 0 and next; my $out_fd = $Global::fd{$fdno}; my $in_fh = $self->fh($fdno,"r"); if(not $in_fh) { if(not $Job::file_descriptor_warning_printed{$fdno}++) { # ::warning("File descriptor $fdno not defined\n"); } next; } ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n"); if($opt::files) { $self->files_print($fdno,$in_fh,$out_fd); } elsif($opt::linebuffer) { # Line buffered print out $self->linebuffer_print($fdno,$in_fh,$out_fd); } elsif($opt::tag or defined $opt::tagstring) { $self->tag_print($fdno,$in_fh,$out_fd); } else { $self->normal_print($fdno,$in_fh,$out_fd); } flush $out_fd; } ::debug("print", "<<joboutput @command\n"); if($Global::joblog and defined $self->{'exitstatus'} and not ($self->virgin() and $opt::pipe)) { # Add to joblog when finished $self->print_joblog(); } } sub files_print { my $self = shift; my ($fdno,$in_fh,$out_fd) = @_; # If the job is dead: close printing fh. Needed for --compress close $self->fh($fdno,"w"); if($? and $opt::compress) { ::error($opt::compress_program." failed."); $self->set_exitstatus(255); } if($opt::compress) { # Kill the decompressor which will not be needed CORE::kill "TERM", $self->fh($fdno,"rpid"); } close $in_fh; if($opt::pipe and $self->virgin()) { # Nothing was printed to this job: # cleanup unused tmp files if --files was set for my $fdno (1,2) { unlink $self->fh($fdno,"name"); unlink $self->fh($fdno,"unlink"); } } elsif($fdno == 1 and $self->fh($fdno,"name")) { print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n"; if($opt::sqlworker) { $Global::sql->update("SET Stdout = ? WHERE Seq = ".$self->seq(), $self->tag().$self->fh($fdno,"name")); } $self->add_returnsize(-s $self->fh($fdno,"name")); } } sub linebuffer_print { my $self = shift; my ($fdno,$in_fh,$out_fd) = @_; my $partial = \$self->{'partial_line',$fdno}; if(defined $self->{'exitstatus'}) { # If the job is dead: close printing fh. Needed for --compress close $self->fh($fdno,"w"); if($? and $opt::compress) { ::error($opt::compress_program." failed."); $self->set_exitstatus(255); } if($opt::compress) { # Blocked reading in final round $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; for my $fdno (1,2) { my $fdr = $self->fh($fdno,'r'); my $flags; fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle } } } # This seek will clear EOF seek $in_fh, tell($in_fh), 0; # The read is non-blocking: The $in_fh is set to non-blocking. # 32768 --tag = 5.1s # 327680 --tag = 4.4s # 1024000 --tag = 4.4s # 3276800 --tag = 4.3s # 10240000 --tag = 4.3s # 32768000 --tag = 4.7s my $outputlength = 0; while(read($in_fh,substr($$partial,length $$partial),3276800)) { # Append to $$partial # Find the last \n my $i = ::rindex64($partial,"\n"); if($i != -1) { # One or more complete lines were found if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) { # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt # This is a crappy way of ignoring it. $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; # Length of partial line has changed: Find the last \n again $i = ::rindex64($partial,"\n"); } $outputlength += $i+1; if($opt::tag or defined $opt::tagstring) { # Replace ^ with $tag within the full line my $tag = $self->tag(); substr($$partial,0,$i+1) =~ s/^/$tag/gm; # Length of partial line has changed: Find the last \n again $i = ::rindex64($partial,"\n"); } # Print up to and including the last \n print $out_fd substr($$partial,0,$i+1); # Remove the printed part substr($$partial,0,$i+1) = ""; } } $self->add_returnsize($outputlength); if(defined $self->{'exitstatus'}) { # If the job is dead: print the remaining partial line # read remaining $self->add_returnsize(length $$partial); if($$partial and ($opt::tag or defined $opt::tagstring)) { my $tag = $self->tag(); $$partial =~ s/^/$tag/gm; } print $out_fd $$partial; # Release the memory undef $$partial; if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) { # decompress still running } else { # decompress done: close fh close $in_fh; if($? and $opt::compress) { ::error($opt::decompress_program." failed."); $self->set_exitstatus(255); } } } } sub tag_print { my $self = shift; my ($fdno,$in_fh,$out_fd) = @_; my $buf; local $/ = "\n"; close $self->fh($fdno,"w"); if($? and $opt::compress) { ::error($opt::compress_program." failed."); $self->set_exitstatus(255); } seek $in_fh, 0, 0; # $in_fh is now ready for reading at position 0 my $tag = $self->tag(); if($fdno == 2) { # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt # This is a crappy way of ignoring it. while(<$in_fh>) { if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) { # Skip } else { $self->add_returnsize(length $_); print $out_fd $tag,$_; } # At most run the loop once last; } } my $outputlength = 0; while(<$in_fh>) { print $out_fd $tag,$_; $outputlength += length $_; } if($fdno == 1) { $self->add_returnsize($outputlength); } close $in_fh; if($? and $opt::compress) { ::error($opt::decompress_program." failed."); $self->set_exitstatus(255); } } sub normal_print { my $self = shift; my ($fdno,$in_fh,$out_fd) = @_; my $buf; close $self->fh($fdno,"w"); if($? and $opt::compress) { ::error($opt::compress_program." failed."); $self->set_exitstatus(255); } seek $in_fh, 0, 0; # $in_fh is now ready for reading at position 0 my $outputlength = 0; my @output; while(sysread($in_fh,$buf,131072)) { print $out_fd $buf; $outputlength += length $buf; if($opt::sqlworker) { push @output, $buf; } } if($fdno == 1) { $self->add_returnsize($outputlength); if($opt::sqlworker and not $opt::results) { $Global::sql->update("SET Stdout = ? WHERE Seq = ".$self->seq(), join("",@output)); } } else { if($opt::sqlworker and not $opt::results) { $Global::sql->update("SET Stderr = ? WHERE Seq = ".$self->seq(), join("",@output)); } } close $in_fh; if($? and $opt::compress) { ::error($opt::decompress_program." failed."); $self->set_exitstatus(255); } } sub print_joblog { my $self = shift; my $cmd; if($Global::verbose <= 1) { $cmd = $self->replaced(); } else { # Verbose level > 1: Print the rsync and stuff $cmd = "@command"; } print $Global::joblog join("\t", $self->seq(), $self->sshlogin()->string(), $self->starttime(), sprintf("%10.3f",$self->runtime()), $self->transfersize(), $self->returnsize(), $self->exitstatus(), $self->exitsignal(), $cmd ). "\n"; flush $Global::joblog; $self->set_job_in_joblog(); } sub tag { my $self = shift; if($opt::tag or defined $opt::tagstring) { if(not defined $self->{'tag'}) { $self->{'tag'} = $self->{'commandline'}-> replace_placeholders([$opt::tagstring],0,0)."\t"; } } else { return ""; } return $self->{'tag'}; } sub hostgroups { my $self = shift; if(not defined $self->{'hostgroups'}) { $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; } return @{$self->{'hostgroups'}}; } sub exitstatus { my $self = shift; return $self->{'exitstatus'}; } sub set_exitstatus { my $self = shift; my $exitstatus = shift; if($exitstatus) { # Overwrite status if non-zero $self->{'exitstatus'} = $exitstatus; } else { # Set status but do not overwrite # Status may have been set by --timeout $self->{'exitstatus'} ||= $exitstatus; } $opt::sqlworker and $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(), $exitstatus); } sub reset_exitstatus { my $self = shift; undef $self->{'exitstatus'}; } sub exitsignal { my $self = shift; return $self->{'exitsignal'}; } sub set_exitsignal { my $self = shift; my $exitsignal = shift; $self->{'exitsignal'} = $exitsignal; $opt::sqlworker and $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(), $exitsignal); } { my $status_printed; my $total_jobs; sub should_we_halt { # Should we halt? Immediately? Gracefully? # Returns: N/A my $job = shift; # --halt # => 1..100 (number of jobs failed, 101 means > 100) # --halt % => 1..100 (pct of jobs failed) if($Global::halt_pct and not $Global::halt_count) { $total_jobs ||= $Global::JobQueue->total_jobs(); # From the pct compute the number of jobs that must fail/succeed $Global::halt_count = $total_jobs * $Global::halt_pct; } if($job->exitstatus() or $job->exitsignal()) { # Job failed $Global::exitstatus++; $Global::total_failed++; if($Global::halt_fail) { ::status("$Global::progname: This job failed:\n", $job->replaced(),"\n"); if($Global::halt_count <= $Global::total_failed) { # At least N jobs had failed if(not defined $Global::halt_exitstatus) { if($Global::halt_pct) { # --halt now,fail=X% or soon,fail=X% $Global::halt_exitstatus = ::ceil($Global::total_failed / $total_jobs * 100); } elsif($Global::halt_count) { # --halt now,fail=X or soon,fail=X $Global::halt_exitstatus = ::min($Global::total_failed,101); } if($Global::halt_count and $Global::halt_count == 1) { # --halt now,fail=1 or soon,fail=1 $Global::halt_exitstatus = $job->exitstatus(); } } ::debug("halt","Pct: ",$Global::halt_pct," count: ",$Global::halt_count,"\n"); if($Global::halt_when eq "soon" and scalar(keys %Global::running) > 0) { ::status ("$Global::progname: Starting no more jobs. ", "Waiting for ", scalar(keys %Global::running), " jobs to finish.\n"); $Global::start_no_new_jobs ||= 1; } return($Global::halt_when); } } } else { if($Global::halt_success) { ::debug("halt","Pct: ",$Global::halt_pct,"<=", " count: ",$Global::halt_count,"\n"); ::status("$Global::progname: This job succeeded:\n", $job->replaced(),"\n"); if($Global::halt_count <= $Global::total_completed-$Global::total_failed) { # At least N jobs had success # or at least N% had success $Global::halt_exitstatus = 0; if($Global::halt_when eq "soon" and scalar(keys %Global::running) > 0) { ::status ("$Global::progname: Starting no more jobs. ", "Waiting for ", scalar(keys %Global::running), " jobs to finish.\n"); $Global::start_no_new_jobs ||= 1; } return($Global::halt_when); } } } return ""; } } package CommandLine; sub new { my $class = shift; my $seq = shift; my $commandref = shift; $commandref || die; my $arg_queue = shift; my $context_replace = shift; my $max_number_of_args = shift; # for -N and normal (-n1) my $transfer_files = shift; my $return_files = shift; my $replacecount_ref = shift; my $len_ref = shift; my %replacecount = %$replacecount_ref; my %len = %$len_ref; for (keys %$replacecount_ref) { # Total length of this replacement string {} replaced with all args $len{$_} = 0; } return bless { 'command' => $commandref, 'seq' => $seq, 'len' => \%len, 'arg_list' => [], 'arg_list_flat' => [], 'arg_list_flat_orig' => [undef], 'arg_queue' => $arg_queue, 'max_number_of_args' => $max_number_of_args, 'replacecount' => \%replacecount, 'context_replace' => $context_replace, 'transfer_files' => $transfer_files, 'return_files' => $return_files, 'replaced' => undef, }, ref($class) || $class; } sub seq { my $self = shift; return $self->{'seq'}; } sub set_seq { my $self = shift; $self->{'seq'} = shift; } { my $max_slot_number; sub slot { # Find the number of a free job slot and return it # Uses: # @Global::slots - list with free jobslots # Returns: # $jobslot = number of jobslot my $self = shift; if(not $self->{'slot'}) { if(not @Global::slots) { # $Global::max_slot_number will typically be $Global::max_jobs_running push @Global::slots, ++$max_slot_number; } $self->{'slot'} = shift @Global::slots; } return $self->{'slot'}; } } sub populate { # Add arguments from arg_queue until the number of arguments or # max line length is reached # Uses: # $Global::minimal_command_line_length # $opt::cat # $opt::fifo # $Global::JobQueue # $opt::m # $opt::X # $CommandLine::already_spread # $Global::max_jobs_running # Returns: N/A my $self = shift; my $next_arg; my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length(); if($opt::cat or $opt::fifo) { # Get the empty arg added by --pipepart (if any) $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); # $PARALLEL_TMP will point to a tempfile that will be used as {} $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> unget([Arg->new('$PARALLEL_TMP')]); } while (not $self->{'arg_queue'}->empty()) { $next_arg = $self->{'arg_queue'}->get(); if(not defined $next_arg) { next; } $self->push($next_arg); if($self->len() >= $max_len) { # Command length is now > max_length # If there are arguments: remove the last # If there are no arguments: Error # TODO stuff about -x opt_x if($self->number_of_args() > 1) { # There is something to work on $self->{'arg_queue'}->unget($self->pop()); last; } else { my $args = join(" ", map { $_->orig() } @$next_arg); ::error("Command line too long (". $self->len(). " >= ". $max_len. ") at input ". $self->{'arg_queue'}->arg_number(). ": ". ((length $args > 50) ? (substr($args,0,50))."..." : $args)); $self->{'arg_queue'}->unget($self->pop()); ::wait_and_exit(255); } } if(defined $self->{'max_number_of_args'}) { if($self->number_of_args() >= $self->{'max_number_of_args'}) { last; } } } if(($opt::m or $opt::X) and not $CommandLine::already_spread and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { # -m or -X and EOF => Spread the arguments over all jobslots # (unless they are already spread) $CommandLine::already_spread ||= 1; if($self->number_of_args() > 1) { $self->{'max_number_of_args'} = ::ceil($self->number_of_args()/$Global::max_jobs_running); $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = $self->{'max_number_of_args'}; $self->{'arg_queue'}->unget($self->pop_all()); while($self->number_of_args() < $self->{'max_number_of_args'}) { $self->push($self->{'arg_queue'}->get()); } } } if($opt::sql) { # Insert the V1..Vn for this $seq in SQL table instead of generating one $Global::sql->insert_records($self->seq(),$self->{'arg_list_flat_orig'}); } } sub push { # Add one or more records as arguments # Returns: N/A my $self = shift; my $record = shift; push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record; push @{$self->{'arg_list_flat'}}, @$record; push @{$self->{'arg_list'}}, $record; # Make @arg available for {= =} *Arg::arg = $self->{'arg_list_flat_orig'}; my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; for my $perlexpr (keys %{$self->{'replacecount'}}) { if($perlexpr =~ /^(\d+) /) { # Positional defined($record->[$1-1]) or next; $self->{'len'}{$perlexpr} += length $record->[$1-1]->replace($perlexpr,$quote_arg,$self); } else { for my $arg (@$record) { if(defined $arg) { $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self); } } } } } sub pop { # Remove last argument # Returns: # the last record my $self = shift; my $record = pop @{$self->{'arg_list'}}; # pop off arguments from @$record splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1; splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1; my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; for my $perlexpr (keys %{$self->{'replacecount'}}) { if($perlexpr =~ /^(\d+) /) { # Positional defined($record->[$1-1]) or next; $self->{'len'}{$perlexpr} -= length $record->[$1-1]->replace($perlexpr,$quote_arg,$self); } else { for my $arg (@$record) { if(defined $arg) { $self->{'len'}{$perlexpr} -= length $arg->replace($perlexpr,$quote_arg,$self); } } } } return $record; } sub pop_all { # Remove all arguments and zeros the length of replacement perlexpr # Returns: # all records my $self = shift; my @popped = @{$self->{'arg_list'}}; for my $perlexpr (keys %{$self->{'replacecount'}}) { $self->{'len'}{$perlexpr} = 0; } $self->{'arg_list'} = []; $self->{'arg_list_flat_orig'} = [undef]; $self->{'arg_list_flat'} = []; return @popped; } sub number_of_args { # The number of records # Returns: # number of records my $self = shift; # This is really the number of records return $#{$self->{'arg_list'}}+1; } sub number_of_recargs { # The number of args in records # Returns: # number of args records my $self = shift; my $sum = 0; my $nrec = scalar @{$self->{'arg_list'}}; if($nrec) { $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); } return $sum; } sub args_as_string { # Returns: # all unmodified arguments joined with ' ' (similar to {}) my $self = shift; return (join " ", map { $_->orig() } map { @$_ } @{$self->{'arg_list'}}); } sub args_as_dirname { # Returns: # all unmodified arguments joined with '/' (similar to {}) # \t \0 \\ and / are quoted as: \t \0 \\ \_ # If $Global::max_file_length: Keep subdirs < $Global::max_file_length my $self = shift; my @res = (); for my $rec_ref (@{$self->{'arg_list'}}) { # If headers are used, sort by them. # Otherwise keep the order from the command line. my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); for my $n (@header_indexes_sorted) { CORE::push(@res, $Global::input_source_header{$n}, map { my $s = $_; # \t \0 \\ and / are quoted as: \t \0 \\ \_ $s =~ s/\\/\\\\/g; $s =~ s/\t/\\t/g; $s =~ s/\0/\\0/g; $s =~ s:/:\\_:g; if($Global::max_file_length) { # Keep each subdir shorter than the longest # allowed file name $s = substr($s,0,$Global::max_file_length); } $s; } $rec_ref->[$n-1]->orig()); } } return join "/", @res; } sub header_indexes_sorted { # Sort headers first by number then by name. # E.g.: 1a 1b 11a 11b # Returns: # Indexes of %Global::input_source_header sorted my $max_col = shift; no warnings 'numeric'; for my $col (1 .. $max_col) { # Make sure the header is defined. If it is not: use column number if(not defined $Global::input_source_header{$col}) { $Global::input_source_header{$col} = $col; } } my @header_indexes_sorted = sort { # Sort headers numerically then asciibetically $Global::input_source_header{$a} <=> $Global::input_source_header{$b} or $Global::input_source_header{$a} cmp $Global::input_source_header{$b} } 1 .. $max_col; return @header_indexes_sorted; } sub len { # Uses: # $opt::shellquote # The length of the command line with args substituted my $self = shift; my $len = 0; # Add length of the original command with no args # Length of command w/ all replacement args removed $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; ::debug("length", "noncontext + command: $len\n"); my $recargs = $self->number_of_recargs(); if($self->{'context_replace'}) { # Context is duplicated for each arg $len += $recargs * $self->{'len'}{'context'}; for my $replstring (keys %{$self->{'replacecount'}}) { # If the replacements string is more than once: mulitply its length $len += $self->{'len'}{$replstring} * $self->{'replacecount'}{$replstring}; ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", $self->{'replacecount'}{$replstring}, "\n"); } # echo 11 22 33 44 55 66 77 88 99 1010 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 # 5 + ctxgrp*arg ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); # Add space between context groups $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); } else { # Each replacement string may occur several times # Add the length for each time $len += 1*$self->{'len'}{'context'}; ::debug("length", "context+noncontext + command: $len\n"); for my $replstring (keys %{$self->{'replacecount'}}) { # (space between regargs + length of replacement) # * number this replacement is used $len += ($recargs -1 + $self->{'len'}{$replstring}) * $self->{'replacecount'}{$replstring}; } } if($Global::quoting) { # Pessimistic length if -q is set # Worse than worst case: every char needs to be quoted with \ $len *= 2; } if($opt::shellquote) { # Pessimistic length if --shellquote is set # Worse than worst case: every char needs to be quoted with \ twice $len *= 4; } # If we are using --env, add the prefix for that, too. $len += $Global::envvarlen; return $len; } sub replaced { # Uses: # $Global::noquote # $Global::quoting # Returns: # $replaced = command with place holders replaced and prepended my $self = shift; if(not defined $self->{'replaced'}) { # Don't quote arguments if the input is the full command line my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg; $self->{'replaced'} = $self-> replace_placeholders($self->{'command'},$Global::quoting, $quote_arg); my $len = length $self->{'replaced'}; if ($len != $self->len()) { ::debug("length", $len, " != ", $self->len(), " ", $self->{'replaced'}, "\n"); } else { ::debug("length", $len, " == ", $self->len(), " ", $self->{'replaced'}, "\n"); } } return $self->{'replaced'}; } { my @target; my $context_replace; my $perl_expressions_as_re; my @arg; my %words_containing_replacement_strings; sub fish_out_words_containing_replacement_strings { if(not $words_containing_replacement_strings{$context_replace,@target}) { my %word; for (@target) { my $tt = $_; ::debug("replace", "Target: $tt"); # Command line template: # a{1}b{}c{}d # becomes: # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d # becomes: # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d # Input A B C (no context) becomes: # A B C => aAbA B CcA B Cd # Input A B C (context -X) becomes: # A B C => aAbAcAd aAbBcBd aAbCcCd if($context_replace) { while($tt =~ s/([^\s\257]* # before {= (?: \257< # {= [^\257]*? # The perl expression \257> # =} [^\s\257]* # after =} )+)/ /x) { # $1 = pre \257 perlexpr \257 post $word{"$1"} ||= 1; } } else { while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) { # $f = \257 perlexpr \257 $word{$1} ||= 1; } } } @{$words_containing_replacement_strings{$context_replace,@target}} = keys %word } return @{$words_containing_replacement_strings{$context_replace,@target}}; } sub replace_placeholders { # Replace foo{}bar with fooargbar # Uses: # @Arg::arg = arguments as strings to be use in {= =} # Input: # $targetref = command as shell words # $quote = should everything be quoted? # $quote_arg = should replaced arguments be quoted? # Returns: # @target with placeholders replaced my $self = shift; my $targetref = shift; my $quote = shift; my $quote_arg = shift; my %replace; # -X = context replace (fish_out_words_containing_replacement_strings) $context_replace = $self->{'context_replace'}; @target = @$targetref; ::debug("replace", "Replace @target\n"); if(not @target) { # @target is empty: Return empty array return @target; } # Make it possible to use $arg[2] in {= =} *Arg::arg = $self->{'arg_list_flat_orig'}; # Flat list: # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ] if(not @{$self->{'arg_list_flat'}}) { @{$self->{'arg_list_flat'}} = Arg->new(""); } my $argref = $self->{'arg_list_flat'}; # Number of arguments - used for positional arguments my $n = $#$argref+1; # $self is actually a CommandLine-object, # but it looks nice to be able to say {= $job->slot() =} my $job = $self; # Fish out the words that have replacement strings in them for my $word ( fish_out_words_containing_replacement_strings()) { # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF ::debug("replace", "Replacing in $word\n"); my $normal_replace; # for each arg: # replace replacement strings with replacement in the word value # push to replace word value $perl_expressions_as_re ||= join("|", map {s/^-?\d+//; "\Q$_\E"} keys %{$self->{'replacecount'}}); for my $arg (@$argref) { my $val = $word; # Replace {= perl expr =} with value for each arg $val =~ s{\257<(-?\d+)?($perl_expressions_as_re)\257>} { if($1) { # Positional replace # Find the relevant arg and replace it ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace $argref->[$1 > 0 ? $1-1 : $n+$1]-> replace($2,$quote_arg,$self) : ""); } else { # Normal replace $normal_replace ||= 1; ($arg ? $arg->replace($2,$quote_arg,$self) : ""); } }goxe; if($quote) { CORE::push(@{$replace{::shell_quote_scalar($word)}}, ::shell_quote_scalar($val)); } else { CORE::push(@{$replace{$word}}, $val); } # No normal replacements => only run once $normal_replace or last; } } @Arg::arg = (); if($quote) { @target = ::shell_quote(@target); } if(%replace) { # Substitute the replace strings with the replacement values # Must be sorted by length if a short word is a substring of a long word my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s } sort { length $b <=> length $a } keys %replace); for(@target) { s/($regexp)/join(" ",@{$replace{$1}})/ge; } } ::debug("replace", "Return @target\n"); return wantarray ? @target : "@target"; } } sub skip { # Skip this job my $self = shift; $self->{'skip'} = 1; } package CommandLineQueue; sub new { my $class = shift; my $commandref = shift; my $read_from = shift; my $context_replace = shift || 0; my $max_number_of_args = shift; my $transfer_files = shift; my $return_files = shift; my @unget = (); my ($count,$posrpl,$perlexpr); my ($replacecount_ref, $len_ref); my @command = @$commandref; my $dummy = ''; # If the first command start with '-' it is probably an option if($command[0] =~ /^\s*(-\S+)/) { # Is this really a command in $PATH starting with '-'? my $cmd = $1; if(not ::which($cmd)) { ::error("Command ($cmd) starts with '-'. Is this a wrong option?"); ::wait_and_exit(255); } } # Replace replacement strings with {= perl expr =} # '{=' 'perlexpr' '=}' => '{= perlexpr =}' @command = merge_rpl_parts(@command); # Protect matching inside {= perl expr =} # by replacing {= and =} with \257< and \257> # in @command, --return and --tagstring (if used) for(@command,@$transfer_files,@$return_files, (defined $opt::tagstring ? $opt::tagstring : $dummy), (defined $opt::workdir ? $opt::workdir : $dummy)) { # Disallow \257 to avoid nested {= {= =} =} if(/\257/) { ::error("Command cannot contain the character \257. Use a function for that."); ::wait_and_exit(255); } # Needs to match rightmost left parens (Perl defaults to leftmost) # to deal with: {={==} while(s{([^\257]*) \Q$Global::parensleft\E ([^\257]*?) \Q$Global::parensright\E } {$1\257<$2\257>}gx) {} for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) { # Replace long --rpl's before short ones, as a short may be a # substring of a long: # --rpl '% s/a/b/' --rpl '%% s/b/a/' # Replace the short hand string (--rpl) # with the {= perl expr =} # Avoid replacing inside existing {= perl expr =} while(s{((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> \Q$rpl\E} {$1\257<$Global::rpl{$rpl}\257>}xg) { } # Do the same for the positional replacement strings # A bit harder as we have to put in the position number $posrpl = $rpl; if($posrpl =~ s/^\{//) { # Only do this if the shorthand start with { while(s{((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> \{(-?\d+)\Q$posrpl\E} {$1\257<$3 $Global::rpl{$rpl}\257>}xg) { } } } } # Add {} if no replacement strings in @command ($replacecount_ref, $len_ref, @command) = replacement_counts_and_lengths($transfer_files,$return_files,@command); if("@command" =~ /^[^ \t\n=]*\257</) { # Replacement string is (part of) the command (and not just # argument or variable definition V1={}) # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2} # Do no quote (Otherwise it will fail if the input contains spaces) $Global::noquote = 1; } return bless { 'unget' => \@unget, 'command' => \@command, 'replacecount' => $replacecount_ref, 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), 'context_replace' => $context_replace, 'len' => $len_ref, 'max_number_of_args' => $max_number_of_args, 'size' => undef, 'transfer_files' => $transfer_files, 'return_files' => $return_files, 'seq' => 1, }, ref($class) || $class; } sub merge_rpl_parts { # '{=' 'perlexpr' '=}' => '{= perlexpr =}' # Input: # @in = the @command as given by the user # Uses: # $Global::parensleft # $Global::parensright # Returns: # @command with parts merged to keep {= and =} as one my @in = @_; my @out; my $l = quotemeta($Global::parensleft); my $r = quotemeta($Global::parensright); while(@in) { my $s = shift @in; $_ = $s; # Remove matching (right most) parens while(s/(.*)$l.*?$r/$1/o) {} if(/$l/o) { # Missing right parens while(@in) { $s .= " ".shift @in; $_ = $s; while(s/(.*)$l.*?$r/$1/o) {} if(not /$l/o) { last; } } } push @out, $s; } return @out; } sub replacement_counts_and_lengths { # Count the number of different replacement strings. # Find the lengths of context for context groups and non-context # groups. # If no {} found in @command: add it to @command # # Input: # \@transfer_files = array of filenames to transfer # \@return_files = array of filenames to return # @command = command template # Output: # \%replacecount, \%len, @command my $transfer_files = shift; my $return_files = shift; my @command = @_; my (%replacecount,%len); my $sum = 0; while($sum == 0) { # Count how many times each replacement string is used my @cmd = @command; my $contextlen = 0; my $noncontextlen = 0; my $contextgroups = 0; for my $c (@cmd) { while($c =~ s/ \257<([^\257]*?)\257> /\000/x) { # %replacecount = { "perlexpr" => number of times seen } # e.g { "s/a/b/" => 2 } $replacecount{$1}++; $sum++; } # Measure the length of the context around the {= perl expr =} # Use that {=...=} has been replaced with \000 above # So there is no need to deal with \257< while($c =~ s/ (\S*\000\S*) //x) { my $w = $1; $w =~ tr/\000//d; # Remove all \000's $contextlen += length($w); $contextgroups++; } # All {= perl expr =} have been removed: The rest is non-context $noncontextlen += length $c; } for(@$transfer_files,@$return_files) { my $t = $_; while($t =~ s/ \257<([^\257]*)\257> //x) { # %replacecount = { "perlexpr" => number of times seen } # e.g { "$_++" => 2 } # But for tagstring we just need to mark it as seen $replacecount{$1} ||= 1; } } if($opt::tagstring) { my $t = $opt::tagstring; while($t =~ s/ \257<([^\257]*)\257> //x) { # %replacecount = { "perlexpr" => number of times seen } # e.g { "$_++" => 2 } # But for tagstring we just need to mark it as seen $replacecount{$1} ||= 1; } } if($opt::workdir) { my $t = $opt::workdir; while($t =~ s/ \257<([^\257]*)\257> //x) { # %replacecount = { "perlexpr" => number of times seen } # e.g { "$_++" => 2 } # But for workdir we just need to mark it as seen $replacecount{$1} ||= 1; } } if($opt::bar) { # If the command does not contain {} force it to be computed # as it is being used by --bar $replacecount{""} ||= 1; } $len{'context'} = 0+$contextlen; $len{'noncontext'} = $noncontextlen; $len{'contextgroups'} = $contextgroups; $len{'noncontextgroups'} = @cmd-$contextgroups; ::debug("length", "@command Context: ", $len{'context'}, " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); if($sum == 0) { if(not @command) { # Default command = {} @command = ("\257<\257>"); } elsif(($opt::pipe or $opt::pipepart) and not $opt::fifo and not $opt::cat) { # With --pipe / --pipe-part you can have no replacement last; } else { # Append {} to the command if there are no {...}'s and no {=...=} push @command, ("\257<\257>"); } } } return(\%replacecount,\%len,@command); } sub get { my $self = shift; if(@{$self->{'unget'}}) { my $cmd_line = shift @{$self->{'unget'}}; return ($cmd_line); } else { my $cmd_line = CommandLine->new($self->seq(), $self->{'command'}, $self->{'arg_queue'}, $self->{'context_replace'}, $self->{'max_number_of_args'}, $self->{'transfer_files'}, $self->{'return_files'}, $self->{'replacecount'}, $self->{'len'}, ); $cmd_line->populate(); if($opt::sqlworker) { # Get the sequence number from the SQL table $cmd_line->set_seq($SQL::next_seq); } ::debug("init","cmd_line->number_of_args ", $cmd_line->number_of_args(), "\n"); if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) { if($cmd_line->replaced() eq "") { # Empty command - pipe requires a command ::error("--pipe/--pipepart must have a command to pipe into ". "(e.g. 'cat')."); ::wait_and_exit(255); } } else { if($cmd_line->number_of_args() == 0) { # We did not get more args - maybe at EOF string? return undef; } elsif($cmd_line->replaced() eq "") { # Empty command - get the next instead return $self->get(); } } $self->set_seq($self->seq()+1); return $cmd_line; } } sub unget { my $self = shift; unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty(); ::debug("run", "CommandLineQueue->empty $empty"); return $empty; } sub seq { my $self = shift; return $self->{'seq'}; } sub set_seq { my $self = shift; $self->{'seq'} = shift; } sub quote_args { my $self = shift; # If there is not command emulate |bash return $self->{'command'}; } package Limits::Command; # Maximal command line length (for -m and -X) sub max_length { # Find the max_length of a command line and cache it # Returns: # number of chars on the longest command line allowed if(not $Limits::Command::line_max_len) { # Disk cache of max command line length my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname(); my $cached_limit; if(-e $len_cache) { open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache"); $cached_limit = <$fh>; close $fh; } else { $cached_limit = real_max_length(); # If $HOME is write protected: Do not fail mkdir($ENV{'HOME'} . "/.parallel"); mkdir($ENV{'HOME'} . "/.parallel/tmp"); open(my $fh, ">", $len_cache); print $fh $cached_limit; close $fh; } $Limits::Command::line_max_len = tmux_length($cached_limit); if($opt::max_chars) { if($opt::max_chars * 2 <= $cached_limit) { # $opt::max_chars quoting causes the length to double $Limits::Command::line_max_len = $opt::max_chars * 2; } else { ::warning("Value for -s option should be < $cached_limit."); } } } return int($Limits::Command::line_max_len/2); } sub real_max_length { # Find the max_length of a command line # Returns: # The maximal command line length # Use an upper bound of 8 MB if the shell allows for for infinite long lengths my $upper = 8_000_000; my $len = 8; do { if($len > $upper) { return $len }; $len *= 16; } while (is_acceptable_command_line_length($len)); # Then search for the actual max length between 0 and upper bound return binary_find_max_length(int($len/16),$len); } sub binary_find_max_length { # Given a lower and upper bound find the max_length of a command line # Returns: # number of chars on the longest command line allowed my ($lower, $upper) = (@_); if($lower == $upper or $lower == $upper-1) { return $lower; } my $middle = int (($upper-$lower)/2 + $lower); ::debug("init", "Maxlen: $lower,$upper,$middle : "); if (is_acceptable_command_line_length($middle)) { return binary_find_max_length($middle,$upper); } else { return binary_find_max_length($lower,$middle); } } sub is_acceptable_command_line_length { # Test if a command line of this length can run # Returns: # 0 if the command line length is too long # 1 otherwise my $len = shift; local *STDERR; open (STDERR, ">", "/dev/null"); system "true "."x"x$len; close STDERR; ::debug("init", "$len=$? "); return not $?; } sub tmux_length { # If $opt::tmux set, find the limit for tmux # tmux 1.8 has a 2kB limit # tmux 1.9 has a 16kB limit # Input: # $len = maximal command line length # Returns: # $tmux_len = maximal length runable in tmux my $len = shift; if($opt::tmux) { $ENV{'TMUX'} ||= "tmux"; if(not ::which($ENV{'TMUX'})) { ::error($ENV{'TMUX'}." not found in \$PATH."); ::wait_and_exit(255); } my @out; for my $l (1, 2020, 16320, 100000, $len) { my $tmpfile = ::tmpname("tms"); my $tmuxcmd = $ENV{'TMUX'}." -S $tmpfile new-session -d -n echo $l". ("x"x$l). " && echo $l; rm -f $tmpfile"; push @out, ::qqx($tmuxcmd); unlink $tmpfile; } ::debug("tmux","tmux-length ",@out); chomp @out; # The arguments is given 3 times on the command line # and the wrapping is around 30 chars # (29 for tmux1.9, 33 for tmux1.8) my $tmux_len = (::max(@out)); $len = ::min($len,int($tmux_len/4-33)); ::debug("tmux","tmux-length ",$len); } return $len; } package RecordQueue; sub new { my $class = shift; my $fhs = shift; my $colsep = shift; my @unget = (); my $arg_sub_queue; if($opt::sqlworker) { # Open SQL table $arg_sub_queue = SQLRecordQueue->new(); } elsif($colsep) { # Open one file with colsep $arg_sub_queue = RecordColQueue->new($fhs); } else { # Open one or more files if multiple -a $arg_sub_queue = MultifileQueue->new($fhs); } return bless { 'unget' => \@unget, 'arg_number' => 0, 'arg_sub_queue' => $arg_sub_queue, }, ref($class) || $class; } sub get { # Returns: # reference to array of Arg-objects my $self = shift; if(@{$self->{'unget'}}) { $self->{'arg_number'}++; # Flush cached computed replacements in Arg-objects # To fix: parallel --bar echo {%} ::: a b c ::: d e f my $ret = shift @{$self->{'unget'}}; if($ret) { map { $_->flush_cache() } map { @$_ } $ret; } return $ret; } my $ret = $self->{'arg_sub_queue'}->get(); if(defined $Global::max_number_of_args and $Global::max_number_of_args == 0) { ::debug("run", "Read 1 but return 0 args\n"); # \0 => nothing (not the empty string) return [Arg->new("\0")]; } else { # Flush cached computed replacements in Arg-objects # To fix: parallel --bar echo {%} ::: a b c ::: d e f if($ret) { map { $_->flush_cache() } map { @$_ } $ret; } return $ret; } } sub unget { my $self = shift; ::debug("run", "RecordQueue-unget '@_'\n"); $self->{'arg_number'} -= @_; unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = not @{$self->{'unget'}}; $empty &&= $self->{'arg_sub_queue'}->empty(); ::debug("run", "RecordQueue->empty $empty"); return $empty; } sub arg_number { my $self = shift; return $self->{'arg_number'}; } package RecordColQueue; sub new { my $class = shift; my $fhs = shift; my @unget = (); my $arg_sub_queue = MultifileQueue->new($fhs); return bless { 'unget' => \@unget, 'arg_sub_queue' => $arg_sub_queue, }, ref($class) || $class; } sub get { # Returns: # reference to array of Arg-objects my $self = shift; if(@{$self->{'unget'}}) { return shift @{$self->{'unget'}}; } my $unget_ref = $self->{'unget'}; if($self->{'arg_sub_queue'}->empty()) { return undef; } my $in_record = $self->{'arg_sub_queue'}->get(); if(defined $in_record) { my @out_record = (); for my $arg (@$in_record) { ::debug("run", "RecordColQueue::arg $arg\n"); my $line = $arg->orig(); ::debug("run", "line='$line'\n"); if($line ne "") { for my $s (split /$opt::colsep/o, $line, -1) { push @out_record, Arg->new($s); } } else { push @out_record, Arg->new(""); } } return \@out_record; } else { return undef; } } sub unget { my $self = shift; ::debug("run", "RecordColQueue-unget '@_'\n"); unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty()); ::debug("run", "RecordColQueue->empty $empty"); return $empty; } package SQLRecordQueue; sub new { my $class = shift; my @unget = (); return bless { 'unget' => \@unget, }, ref($class) || $class; } sub get { # Returns: # reference to array of Arg-objects my $self = shift; if(@{$self->{'unget'}}) { return shift @{$self->{'unget'}}; } return $Global::sql->get_record(); } sub unget { my $self = shift; ::debug("run", "SQLRecordQueue-unget '@_'\n"); unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; if(@{$self->{'unget'}}) { return 0; } my $get = $self->get(); if(defined $get) { $self->unget($get); } my $empty = not $get; ::debug("run", "SQLRecordQueue->empty $empty"); return $empty; } package MultifileQueue; @Global::unget_argv=(); sub new { my $class = shift; my $fhs = shift; for my $fh (@$fhs) { if(-t $fh and -t ($Global::status_fd || *STDERR)) { ::warning("Input is read from the terminal.", "Only experts do this on purpose. ". "Press CTRL-D to exit."); } } return bless { 'unget' => \@Global::unget_argv, 'fhs' => $fhs, 'arg_matrix' => undef, }, ref($class) || $class; } sub get { my $self = shift; if($opt::xapply) { return $self->xapply_get(); } else { return $self->nest_get(); } } sub unget { my $self = shift; ::debug("run", "MultifileQueue-unget '@_'\n"); unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = (not @Global::unget_argv and not @{$self->{'unget'}}); for my $fh (@{$self->{'fhs'}}) { $empty &&= eof($fh); } ::debug("run", "MultifileQueue->empty $empty "); return $empty; } sub xapply_get { my $self = shift; if(@{$self->{'unget'}}) { return shift @{$self->{'unget'}}; } my @record = (); my $prepend; my $empty = 1; for my $fh (@{$self->{'fhs'}}) { my $arg = read_arg_from_fh($fh); if(defined $arg) { # Record $arg for recycling at end of file push @{$self->{'arg_matrix'}{$fh}}, $arg; push @record, $arg; $empty = 0; } else { ::debug("run", "EOA "); # End of file: Recycle arguments push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}}; # return last @{$args->{'args'}{$fh}}; push @record, @{$self->{'arg_matrix'}{$fh}}[-1]; } } if($empty) { return undef; } else { return \@record; } } sub nest_get { my $self = shift; if(@{$self->{'unget'}}) { return shift @{$self->{'unget'}}; } my @record = (); my $prepend; my $empty = 1; my $no_of_inputsources = $#{$self->{'fhs'}} + 1; if(not $self->{'arg_matrix'}) { # Initialize @arg_matrix with one arg from each file # read one line from each file my @first_arg_set; my $all_empty = 1; for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); if(defined $arg) { $all_empty = 0; } $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; } if($all_empty) { # All filehandles were at eof or eof-string return undef; } return [@first_arg_set]; } # Treat the case with one input source special. For multiple # input sources we need to remember all previously read values to # generate all combinations. But for one input source we can # forget the value after first use. if($no_of_inputsources == 1) { my $arg = read_arg_from_fh($self->{'fhs'}[0]); if(defined($arg)) { return [$arg]; } return undef; } for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { if(eof($self->{'fhs'}[$fhno])) { next; } else { # read one my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); defined($arg) || next; # If we just read an EOF string: Treat this as EOF my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; $self->{'arg_matrix'}[$fhno][$len] = $arg; # make all new combinations my @combarg = (); for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}]; } $combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry # map combinations # [ 1, 3, 7 ], [ 2, 4, 1 ] # => # [ m[0][1], m[1][3], m[3][7] ], [ m[0][2], m[1][4], m[2][1] ] my @mapped; for my $c (expand_combinations(@combarg)) { my @a; for my $n (0 .. $no_of_inputsources - 1 ) { push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; } push @mapped, \@a; } # append the mapped to the ungotten arguments push @{$self->{'unget'}}, @mapped; # get the first return shift @{$self->{'unget'}}; } } # all are eof or at EOF string; return from the unget queue return shift @{$self->{'unget'}}; } sub read_arg_from_fh { # Read one Arg from filehandle # Returns: # Arg-object with one read line # undef if end of file my $fh = shift; my $prepend; my $arg; do {{ # This makes 10% faster if(not ($arg = <$fh>)) { if(defined $prepend) { return Arg->new($prepend); } else { return undef; } } # Remove delimiter $arg =~ s:$/$::; if($Global::end_of_file_string and $arg eq $Global::end_of_file_string) { # Ignore the rest of input file close $fh; ::debug("run", "EOF-string ($arg) met\n"); if(defined $prepend) { return Arg->new($prepend); } else { return undef; } } if(defined $prepend) { $arg = $prepend.$arg; # For line continuation undef $prepend; } if($Global::ignore_empty) { if($arg =~ /^\s*$/) { redo; # Try the next line } } if($Global::max_lines) { if($arg =~ /\s$/) { # Trailing space => continued on next line $prepend = $arg; redo; } } }} while (1 == 0); # Dummy loop {{}} for redo if(defined $arg) { return Arg->new($arg); } else { ::die_bug("multiread arg undefined"); } } sub expand_combinations { # Input: # ([xmin,xmax], [ymin,ymax], ...) # Returns: ([x,y,...],[x,y,...]) # where xmin <= x <= xmax and ymin <= y <= ymax my $minmax_ref = shift; my $xmin = $$minmax_ref[0]; my $xmax = $$minmax_ref[1]; my @p; if(@_) { # If there are more columns: Compute those recursively my @rest = expand_combinations(@_); for(my $x = $xmin; $x <= $xmax; $x++) { push @p, map { [$x, @$_] } @rest; } } else { for(my $x = $xmin; $x <= $xmax; $x++) { push @p, [$x]; } } return @p; } package Arg; sub new { my $class = shift; my $orig = shift; my @hostgroups; if($opt::hostgroups) { if($orig =~ s:@(.+)::) { # We found hostgroups on the arg @hostgroups = split(/\+/, $1); if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { ::warning("No such hostgroup (@hostgroups)."); @hostgroups = (keys %Global::hostgroups); } } else { @hostgroups = (keys %Global::hostgroups); } } return bless { 'orig' => $orig, 'hostgroups' => \@hostgroups, }, ref($class) || $class; } sub Q { # Q alias for ::shell_quote_scalar # Run shell_quote_scalar once to set the reference to the sub my $a = ::shell_quote_scalar(@_); *Q = \&::shell_quote_scalar; return $a; } sub total_jobs { return $Global::JobQueue->total_jobs() } { my %perleval; sub replace { # Calculates the corresponding value for a given perl expression # Returns: # The calculated string (quoted if asked for) my $self = shift; my $perlexpr = shift; # E.g. $_=$_ or s/.gz// my $quote = (shift) ? 1 : 0; # should the string be quoted? # This is actually a CommandLine-object, # but it looks nice to be able to say {= $job->slot() =} my $job = shift; $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace if(not $self->{'cache'}{$perlexpr}) { # Only compute the value once # Use $_ as the variable to change local $_; if($Global::trim eq "n") { $_ = $self->{'orig'}; } else { # Trim the input $_ = trim_of($self->{'orig'}); } ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); if(not $perleval{$perlexpr}) { # Make an anonymous function of the $perlexpr # And more importantly: Compile it only once if($perleval{$perlexpr} = eval('sub { no strict; no warnings; my $job = shift; '. $perlexpr.' }')) { # All is good } else { # The eval failed. Maybe $perlexpr is invalid perl? ::error("Cannot use $perlexpr: $@"); ::wait_and_exit(255); } } # Execute the function $perleval{$perlexpr}->($job); $self->{'cache'}{$perlexpr} = $_; } # Return the value quoted if needed return($quote ? ::shell_quote_scalar($self->{'cache'}{$perlexpr}) : $self->{'cache'}{$perlexpr}); } } sub flush_cache { # Flush cache of computed values my $self = shift; $self->{'cache'} = undef; } sub orig { my $self = shift; return $self->{'orig'}; } sub trim_of { # Removes white space as specifed by --trim: # n = nothing # l = start # r = end # lr|rl = both # Returns: # string with white space removed as needed my @strings = map { defined $_ ? $_ : "" } (@_); my $arg; if($Global::trim eq "n") { # skip } elsif($Global::trim eq "l") { for my $arg (@strings) { $arg =~ s/^\s+//; } } elsif($Global::trim eq "r") { for my $arg (@strings) { $arg =~ s/\s+$//; } } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } } else { ::error("--trim must be one of: r l rl lr."); ::wait_and_exit(255); } return wantarray ? @strings : "@strings"; } package TimeoutQueue; sub new { my $class = shift; my $delta_time = shift; my ($pct); if($delta_time =~ /(\d+(\.\d+)?)%/) { # Timeout in percent $pct = $1/100; $delta_time = 1_000_000; } return bless { 'queue' => [], 'delta_time' => $delta_time, 'pct' => $pct, 'remedian_idx' => 0, 'remedian_arr' => [], 'remedian' => undef, }, ref($class) || $class; } sub delta_time { my $self = shift; return $self->{'delta_time'}; } sub set_delta_time { my $self = shift; $self->{'delta_time'} = shift; } sub remedian { my $self = shift; return $self->{'remedian'}; } sub set_remedian { # Set median of the last 999^3 (=997002999) values using Remedian # # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A # robust averaging method for large data sets." Journal of the # American Statistical Association 85.409 (1990): 97-104. my $self = shift; my $val = shift; my $i = $self->{'remedian_idx'}++; my $rref = $self->{'remedian_arr'}; $rref->[0][$i%999] = $val; $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; } sub update_median_runtime { # Update delta_time based on runtime of finished job if timeout is # a percentage my $self = shift; my $runtime = shift; if($self->{'pct'}) { $self->set_remedian($runtime); $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); ::debug("run", "Timeout: $self->{'delta_time'}s "); } } sub process_timeouts { # Check if there was a timeout my $self = shift; # $self->{'queue'} is sorted by start time while (@{$self->{'queue'}}) { my $job = $self->{'queue'}[0]; if($job->endtime()) { # Job already finished. No need to timeout the job # This could be because of --keep-order shift @{$self->{'queue'}}; } elsif($job->is_timedout($self->{'delta_time'})) { # Need to shift off queue before kill # because kill calls usleep that calls process_timeouts shift @{$self->{'queue'}}; $job->kill(); } else { # Because they are sorted by start time the rest are later last; } } } sub insert { my $self = shift; my $in = shift; push @{$self->{'queue'}}, $in; } package SQL; sub new { my $class = shift; my $dburl = shift; $Global::use{"DBI"} ||= eval "use DBI; 1;"; my %options = parse_dburl(get_alias($dburl)); my %driveralias = ("sqlite" => "SQLite", "sqlite3" => "SQLite", "pg" => "Pg", "postgres" => "Pg", "postgresql" => "Pg", "csv" => "CSV", "oracle" => "Oracle", "ora" => "Oracle"); my $driver = $driveralias{$options{'databasedriver'}} || $options{'databasedriver'}; my $database = $options{'database'}; my $host = $options{'host'} ? ";host=".$options{'host'} : ""; my $port = $options{'port'} ? ";port=".$options{'port'} : ""; my $dsn = "DBI:$driver:dbname=$database$host$port"; my $userid = $options{'user'}; my $password = $options{'password'};; my $dbh = DBI->connect($dsn, $userid, $password, { RaiseError => 1, AutoInactiveDestroy => 1 }) or die $DBI::errstr; return bless { 'dbh' => $dbh, 'driver' => $driver, 'max_number_of_args' => undef, 'table' => $options{'table'}, }, ref($class) || $class; } sub get_alias { my $alias = shift; $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql: if ($alias !~ /^:/) { return $alias; } # Find the alias my $path; if (-l $0) { ($path) = readlink($0) =~ m|^(.*)/|; } else { ($path) = $0 =~ m|^(.*)/|; } my @deprecated = ("$ENV{HOME}/.dburl.aliases", "$path/dburl.aliases", "$path/dburl.aliases.dist"); for (@deprecated) { if(-r $_) { ::warning("$_ is deprecated. ". "Use .sql/aliases instead (read man sql)."); } } my @urlalias=(); check_permissions("$ENV{HOME}/.sql/aliases"); check_permissions("$ENV{HOME}/.dburl.aliases"); my @search = ("$ENV{HOME}/.sql/aliases", "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases", "$path/dburl.aliases", "$path/dburl.aliases.dist"); for my $alias_file (@search) { if(-r $alias_file) { push @urlalias, `cat "$alias_file"`; } } my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/; # If we saw this before: we have an alias loop if(grep {$_ eq $alias_part } @Private::seen_aliases) { ::error("$alias_part is a cyclic alias."); exit -1; } else { push @Private::seen_aliases, $alias_part; } my $dburl; for (@urlalias) { /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; } } if($dburl) { return get_alias($dburl.$rest); } else { Usage("$alias is not defined in @search"); exit(-1); } } sub check_permissions { my $file = shift; if(-e $file) { if(not -o $file) { my $username = (getpwuid($<))[0]; ::warning("$file should be owned by $username: ". "chown $username $file"); } my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat($file); if($mode & 077) { my $username = (getpwuid($<))[0]; ::warning("$file should be only be readable by $username: ". "chmod 600 $file"); } } } sub parse_dburl { my $url = shift; my %options = (); # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?sql query]] if($url=~m!(?:sql:)? # You can prefix with 'sql:' ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)| (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1) (?: ([^:@/][^:@]*|) # Username ($2) (?: :([^@]*) # Password ($3) )? @)? ([^:/]*)? # Hostname ($4) (?: : ([^/]*)? # Port ($5) )? (?: / ([^/?]*)? # Database ($6) )? (?: / ([^?]*)? # Table ($7) )? (?: \? (.*)? # Query ($8) )? !ix) { $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1))); $options{user} = ::undef_if_empty(uri_unescape($2)); $options{password} = ::undef_if_empty(uri_unescape($3)); $options{host} = ::undef_if_empty(uri_unescape($4)); $options{port} = ::undef_if_empty(uri_unescape($5)); $options{database} = ::undef_if_empty(uri_unescape($6)); $options{table} = ::undef_if_empty(uri_unescape($7)); $options{query} = ::undef_if_empty(uri_unescape($8)); ::debug("sql","dburl $url\n"); ::debug("sql","databasedriver ",$options{databasedriver}, " user ", $options{user}, " password ", $options{password}, " host ", $options{host}, " port ", $options{port}, " database ", $options{database}, " table ",$options{table}," query ",$options{query}, "\n"); } else { ::error("$url is not a valid DBURL"); exit 255; } return %options; } sub uri_unescape { # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm # to avoid depending on URI::Escape # This section is (C) Gisle Aas. # Note from RFC1630: "Sequences which start with a percent sign # but are not followed by two hexadecimal characters are reserved # for future extension" my $str = shift; if (@_ && wantarray) { # not executed for the common case of a single argument my @str = ($str, @_); # need to copy foreach (@str) { s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; } return @str; } $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; $str; } sub run { my $self = shift; my $stmt = shift; my @retval; my $dbh = $self->{'dbh'}; ::debug("sql","$opt::sql$opt::sqlworker run $stmt\n"); # Execute with the rest of the args - if any my $rv; my $sth; my $lockretry = 0; while($lockretry < 10) { $sth = $dbh->prepare($stmt); if($rv = $sth->execute(@_)) { last; } else { if($DBI::errstr =~ /locked/) { ::debug("sql","Lock retry: $lockretry"); $lockretry++; } else { ::error($DBI::errstr); } } } if($rv < 0){ print $DBI::errstr; } return $sth; } sub get { my $self = shift; my $sth = $self->run(@_); my @retval; while(1) { my @row = $sth->fetchrow_array(); @row or last; push @retval, \@row; } return \@retval; } sub table { my $self = shift; return $self->{'table'}; } sub update { my $self = shift; my $stmt = shift; my $table = $self->table(); $self->run("UPDATE $table $stmt",@_); } sub max_number_of_args { # Maximal number of args for this table my $self = shift; if(not $self->{'max_number_of_args'}) { # Read the number of args from the SQL table my $table = $self->table(); my $v = $self->get("SELECT * FROM $table LIMIT 1;"); my @reserved_columns = qw(Seq Host Starttime JobRuntime Send Receive Exitval _Signal Command Stdout Stderr); if(not $v) { ::error("$table contains no records"); } # Count the number of Vx columns $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns; } return $self->{'max_number_of_args'}; } sub set_max_number_of_args { my $self = shift; $self->{'max_number_of_args'} = shift; } sub create_table { my $self = shift; my $max_number_of_args = shift; $self->set_max_number_of_args($max_number_of_args); my $table = $self->table(); $self->run(qq(DROP TABLE IF EXISTS $table;)); # BIGINT and TEXT are not supported in these databases or are too small my %vartype = ( "Oracle" => { "BIGINT" => "NUMBER(19,0)", "TEXT" => "CLOB", }, "mysql" => { "TEXT" => "LONGTEXT", }, ); my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT"; my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT"; my $FLOAT = "FLOAT(44)"; my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args()); $self->run(qq{CREATE TABLE $table (Seq $BIGINT, Host $TEXT, Starttime $FLOAT, JobRuntime $FLOAT, Send $BIGINT, Receive $BIGINT, Exitval $BIGINT, _Signal $BIGINT, Command $TEXT,}. $v_def. qq{Stdout $TEXT, Stderr $TEXT);}); } sub insert_records { my $self = shift; my $seq = shift; my $record_ref = shift; my $table = $self->table(); my $v_cols = join ",", map { "V$_" } (1..$self->max_number_of_args()); # Two extra value due to $seq, Exitval my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+2); $self->run("INSERT INTO $table (Seq,Exitval,$v_cols) ". "VALUES ($v_vals);", $seq, -1000, @$record_ref[1..$#$record_ref]); } sub get_record { my $self = shift; my @retval; my $table = $self->table(); my $v_cols = join ",", map { "V$_" } (1..$self->max_number_of_args()); my $v = $self->get("SELECT Seq, $v_cols FROM $table ". "WHERE Exitval = -1000 ORDER BY Seq LIMIT 1;"); if($v->[0]) { my $val_ref = $v->[0]; # Mark record as taken my $seq = shift @$val_ref; # Save the sequence number to use when running the job $SQL::next_seq = $seq; $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220); for (@$val_ref) { push @retval, Arg->new($_); } } if(@retval) { return \@retval; } else { return undef; } } sub finished { # Check if there are any jobs left in the SQL table that do not # have a "real" exitval my $self = shift; my $table = $self->table(); my $rv = $self->get("select Seq,Exitval from $table where Exitval <= -1000 limit 1"); return not $rv->[0]; } package Semaphore; # This package provides a counting semaphore # # If a process dies without releasing the semaphore the next process # that needs that entry will clean up dead semaphores # # The semaphores are stored in ~/.parallel/semaphores/id-<name> Each # file in ~/.parallel/semaphores/id-<name>/ is the process ID of the # process holding the entry. If the process dies, the entry can be # taken by another process. sub new { my $class = shift; my $id = shift; my $count = shift; $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex $id = "id-".$id; # To distinguish it from a process id my $parallel_dir = $ENV{'HOME'}."/.parallel"; -d $parallel_dir or ::mkdir_or_die($parallel_dir); my $parallel_locks = $parallel_dir."/semaphores"; -d $parallel_locks or ::mkdir_or_die($parallel_locks); my $lockdir = "$parallel_locks/$id"; my $lockfile = $lockdir.".lock"; if($count < 1) { ::die_bug("semaphore-count: $count"); } return bless { 'lockfile' => $lockfile, 'lockfh' => Symbol::gensym(), 'lockdir' => $lockdir, 'id' => $id, 'idfile' => $lockdir."/".$id, 'pid' => $$, 'pidfile' => $lockdir."/".$$.'@'.::hostname(), 'count' => $count + 1 # nlinks returns a link for the 'id-' as well }, ref($class) || $class; } sub remove_dead_locks { my $self = shift; my $lockdir = $self->{'lockdir'}; for my $d (glob "$lockdir/*") { $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; my ($pid, $host) = ($1, $2); if($host eq ::hostname()) { if(not kill 0, $pid) { ::debug("sem", "Dead: $d\n"); unlink $d; } else { ::debug("sem", "Alive: $d\n"); } } } } sub acquire { my $self = shift; my $sleep = 1; # 1 ms my $start_time = time; while(1) { # Can we get a lock? $self->atomic_link_if_count_less_than() and last; $self->remove_dead_locks(); # Retry slower and slower up to 1 second $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); # Random to avoid every sleeping job waking up at the same time ::usleep(rand()*$sleep); if($opt::semaphoretimeout) { if($opt::semaphoretimeout > 0 and time - $start_time > $opt::semaphoretimeout) { # Timeout: Take the semaphore anyway ::warning("Semaphore timed out. Stealing the semaphore."); if(not -e $self->{'idfile'}) { open (my $fh, ">", $self->{'idfile'}) or ::die_bug("timeout_write_idfile: $self->{'idfile'}"); close $fh; } link $self->{'idfile'}, $self->{'pidfile'}; last; } if($opt::semaphoretimeout < 0 and time - $start_time > -$opt::semaphoretimeout) { # Timeout: Exit ::warning("Semaphore timed out. Exiting."); exit(1); last; } } } ::debug("sem", "acquired $self->{'pid'}\n"); } sub release { my $self = shift; unlink $self->{'pidfile'}; if($self->nlinks() == 1) { # This is the last link, so atomic cleanup $self->lock(); if($self->nlinks() == 1) { unlink $self->{'idfile'}; rmdir $self->{'lockdir'}; } $self->unlock(); } ::debug("run", "released $self->{'pid'}\n"); } sub pid_change { # This should do what release()+acquire() would do without having # to re-acquire the semaphore my $self = shift; my $old_pidfile = $self->{'pidfile'}; $self->{'pid'} = $$; $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname(); my $retval = link $self->{'idfile'}, $self->{'pidfile'}; ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); unlink $old_pidfile; } sub atomic_link_if_count_less_than { # Link $file1 to $file2 if nlinks to $file1 < $count my $self = shift; my $retval = 0; $self->lock(); my $nlinks = $self->nlinks(); ::debug("sem","$nlinks<$self->{'count'} "); if($nlinks < $self->{'count'}) { -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'}); if(not -e $self->{'idfile'}) { open (my $fh, ">", $self->{'idfile'}) or ::die_bug("write_idfile: $self->{'idfile'}"); close $fh; } $retval = link $self->{'idfile'}, $self->{'pidfile'}; ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); } $self->unlock(); ::debug("sem", "atomic $retval"); return $retval; } sub nlinks { my $self = shift; if(-e $self->{'idfile'}) { return (stat(_))[3]; } else { return 0; } } sub lock { my $self = shift; my $sleep = 100; # 100 ms my $total_sleep = 0; $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; my $locked = 0; while(not $locked) { if(tell($self->{'lockfh'}) == -1) { # File not open open($self->{'lockfh'}, ">", $self->{'lockfile'}) or ::debug("run", "Cannot open $self->{'lockfile'}"); } if($self->{'lockfh'}) { # File is open chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { # The file is locked: No need to retry $locked = 1; last; } else { if ($! =~ m/Function not implemented/) { ::warning("flock: $!", "Will wait for a random while."); ::usleep(rand(5000)); # File cannot be locked: No need to retry $locked = 2; last; } } } # Locking failed in first round # Sleep and try again $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); # Random to avoid every sleeping job waking up at the same time ::usleep(rand()*$sleep); $total_sleep += $sleep; if($opt::semaphoretimeout) { if($opt::semaphoretimeout > 0 and $total_sleep/1000 > $opt::semaphoretimeout) { # Timeout: Take the semaphore anyway ::warning("Semaphore timed out. Taking the semaphore."); $locked = 3; last; } if($opt::semaphoretimeout < 0 and $total_sleep/1000 > -$opt::semaphoretimeout) { # Timeout: Exit ::warning("Semaphore timed out. Exiting."); $locked = 4; last; } } else { if($total_sleep/1000 > 30) { ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout."); } } } ::debug("run", "locked $self->{'lockfile'}"); } sub unlock { my $self = shift; unlink $self->{'lockfile'}; close $self->{'lockfh'}; ::debug("run", "unlocked\n"); } # Keep perl -w happy $opt::x = $Semaphore::timeout = $Semaphore::wait = $Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg;