#!/bin/sh
#
# $NetBSD: extract,v 1.23 2024/04/29 08:40:04 jperkin Exp $
#
# Copyright (c) 2006 The NetBSD Foundation, Inc.
# All rights reserved.
#
# This code is derived from software contributed to The NetBSD Foundation
# by Johnny C. Lam.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#

######################################################################
#
# NAME
#	extract -- extract distfile, regardless of format
#
# SYNOPSIS
#	extract [options] distfile [file ...]
#
# DESCRIPTION
#	extract will unpack the contents of the named distfile into the
#	current working directory.  If any optional files are specified then
#	only they will be extracted from the distfile, provided that the
#	underlying tool supports this ability.  If the distfile's file
#	extension doesn't match any known archive format's, then the
#	distfile is simply copied into the current working directory.  If
#	"-" is given as the distfile, then standard input is used as the
#	contents of the archive, provided that the underlying tool supports
#	this ability.
#
# OPTIONS
#	-c format	Force interpretation of the distfile's compression
#			format to be the specified format.  Valid formats
#			are: gzip, bzip, compress, and none.
#
#	-d dir		Extract the files into the specified dir instead
#			of the current working directory.  If the directory
#			doesn't exist, then it is created along with any
#			intermediate directories using the current umask.
#
#	-f format	Force interpretation of the distfile's archive
#			format to be the specified format.
#
#	-t tarprog	This specifies the tool to use to extract tar/ustar
#			archives, and may be either "tar" or "pax", or the
#			full path to the program.
#
#	-X excludefile	excludefile is a list of file patterns to exclude
#			from extraction.  If the -X option is specified
#			then any optional files listed on the command line
#			are ignored.
#
#	-x		This causes the optional files listed on the
#			command line to be excluded from extraction,
#			provided the underlying tool supports this
#			ability.
#
# ENVIRONMENT
#	EXTRACT_OPTS_BIN
#	EXTRACT_OPTS_LHA
#	EXTRACT_OPTS_PAX
#	EXTRACT_OPTS_RAR
#	EXTRACT_OPTS_RPM
#	EXTRACT_OPTS_TAR
#	EXTRACT_OPTS_ZIP
#	EXTRACT_OPTS_ZSTD
#	EXTRACT_OPTS_ZOO
#		These variables set additional arguments passed to the
#		underlying extraction tool to unpack their respective
#		archive formats.
#
######################################################################

set -e		# exit on errors
set -u		# treat undefined variables as errors

: ${BZCAT:=bzcat}
: ${CAT:=cat}
: ${CP:=cp}
: ${ECHO:=echo}
: ${GEM:="gem unpack"}
: ${GZCAT:="gzip -cd"}
: ${LHA:=lha}
: ${LZCAT:="lzip -cd"}
: ${MKDIR:=mkdir}
: ${PAX:=pax}
: ${RM:=rm}
: ${RPM2PKG:=rpm2pkg}
: ${SH:=sh}
: ${TAR:=tar}
: ${TEST:=test}
: ${UNRAR:=unrar}
: ${UNZIP_CMD:=unzip}
: ${UNZOO:=unzoo}
: ${P7ZA:="7za x"}
: ${P7ZA_SO:="7za x -so"}
: ${ZSTD:="zstd"}

: ${TMPDIR:=/tmp}

self="${0##*/}"

usage() {
	${ECHO} 1>&2 "usage: $self [-c format] [-d dir] [-f format] [-t tarprog] [-X excludefile | -x] distfile [file ...]"
}

exclude=no
exclude_file=
exclude_flag=
extract_dir=.
extract_using=tar
format=
cformat=

# Process optional arguments
while ${TEST} $# -gt 0; do
	case "$1" in
	-c)	cformat="$2"; shift 2 ;;
	-d)	extract_dir="$2"; shift 2 ;;
	-f)	format="$2"; shift 2 ;;
	-t)	extract_using="$2"; shift 2 ;;
	-X)	exclude_file="$2"; shift 2 ;;
	-x)	exclude=yes; shift ;;
	--)	shift; break ;;
	-?*)	${ECHO} 1>&2 "$self: unknown option -- ${1#-}"
		usage
		exit 1
		;;
	*)	break ;;
	esac
done

case "$extract_using" in
/*tar|/*pax)	tarprog="$extract_using" ;;
*tar)		tarprog="${TAR}" ;;
*pax)		tarprog="${PAX}" ;;
*)		tarprog="${TAR}" ;;
esac

if ${TEST} -n "$exclude_file" -a ! -f "$exclude_file"; then
	${ECHO} 1>&2 "$self: exclude file missing: $exclude_file"
	exit 1
fi

# Process required arguments
${TEST} $# -gt 0 || { usage; exit 1; }
distfile="$1"; shift

# Make distfile an absolute path, because we will change the current
# directory soon.
case "$distfile" in
/*)	;;
*)	distfile=`exec pwd`/"$distfile"
	;;
esac

# Derive the compression format of the archive based on the file extension.
case "$distfile" in
*.gz|*.tgz|*.z|*.crate)		_cformat=gzip ;;
*.7z)				_cformat=7z ;;
*.bz2|*.tbz|*.tbz2|*.bz)	_cformat=bzip ;;
*.lz)				_cformat=lzip ;;
*.lzma)				_cformat=lzma ;;
*.xz|*.txz)			_cformat=xz ;;
*.Z)				_cformat=compress ;;
*)				_cformat=none ;;
esac
${TEST} -n "$cformat" || cformat="$_cformat"

# Derive the command to decompress the file and write the contents to
# stdout, based on the file extension.
#
case "$cformat" in
gzip|compress)	decompress_cat="${GZCAT}" ;;
7z)		decompress_cat="${P7ZA_SO}" ;;
bzip)		decompress_cat="${BZCAT}" ;;
lzip)		decompress_cat="${LZCAT}" ;;
lzma)		decompress_cat="${XZCAT}" ;;
xz)		decompress_cat="${XZCAT}" ;;
none)		decompress_cat="${CAT}" ;;
*)		decompress_cat="${CAT}" ;;
esac

# Derive the format of the archive based on the file extension.
case "$distfile" in
*.tar.gz|*.tgz|*-tar.gz|*_tar.gz|*.tar.bz2|*.txz|*.tbz|*.tbz2|*.tar.lz|*.tar.lzma|*.tar.xz|*.tar.Z|*.tar.z|*.tar|*.tar.bz|*.tar.7z|*.tar.zst|*.crate)
		_format=tar ;;
*.cpio|*.cpio.gz|*.cpio.bz2)
		_format=cpio ;;
*.shar.gz|*.shar.bz2|*.shar.Z|*.shar|*.shr.gz|*.shr.bz2|*.shr.Z|*.shr)
		_format=shar ;;
*.zip|*.ZIP)	_format=zip ;;
*.lha|*.lzh)	_format=lha ;;
*.Z|*.bz2|*.gz|*.z)
		_format=compressed ;;
*.zoo)		_format=zoo ;;
*.rar)		_format=rar ;;
*.rpm)		_format=rpm ;;
*.bin|*.sh)	_format=jre-bin ;;
*.gem)		_format=gem ;;
*.7z)		_format=7z ;;
*.zst)		_format=zstd ;;
*)		_format=none ;;
esac
${TEST} -n "$format" || format="$_format"

case "$format" in
tar|shar)	;;
*)		if ${TEST} "$distfile" = "-"; then
			${ECHO} 1>&2 "$self: archive format cannot be given on standard input -- $format"
			exit 1
		fi
		;;
esac

${TEST} -d "$extract_dir" || ${MKDIR} -p "$extract_dir"
cd "$extract_dir"

# Use the correct tool and extraction procedure to perform the extraction
# based on the archive format.
#
case "$format" in
tar)
	case "$extract_using" in
	*pax)
		: ${EXTRACT_OPTS_PAX=}
		case "$extract_using" in
		/*)	paxprog="$extract_using" ;;
		*)	paxprog="${PAX}" ;;
		esac
		if ${TEST} -n "$exclude_file"; then
			exclude=yes
			set -- dummy `${CAT} "$exclude_file"`; shift
		fi
		${TEST} "$exclude" = no || exclude_flag="-c"
		$decompress_cat "$distfile" |
			$paxprog ${EXTRACT_OPTS_PAX} $exclude_flag -O -r ${1+"$@"}
		;;
	*tar)
		: ${EXTRACT_OPTS_TAR=}
		case "$extract_using" in
		/*)	tarprog="$extract_using" ;;
		*)	tarprog="${TAR}" ;;
		esac
		tmpfile=
		if ${TEST} "$exclude" = "yes"; then
			tmpfile="${TMPDIR}/$self.$$"
			${RM} -f "$tmpfile"
			trap "\${RM} -f \"\$tmpfile\"" 0
			for i in ${1+"$@"}; do
				${ECHO} "$i" >> "$tmpfile"
			done
			exclude_file="$tmpfile"
		fi
		if ${TEST} -n "$exclude_file"; then
			exclude_flag="-X $exclude_file"
			set -- dummy; shift
		fi
		$decompress_cat "$distfile" |
			$tarprog ${EXTRACT_OPTS_TAR} $exclude_flag -xf - ${1+"$@"}
		;;
	*)
		${ECHO} 1>&2 "$self: unknown tar program: $extract_using"
		exit 1
	esac
	;;

cpio)
	: ${EXTRACT_OPTS_PAX=}
	if ${TEST} -n "$exclude_file"; then
		exclude=yes
		set -- dummy `${CAT} "$exclude_file"`; shift
	fi
	${TEST} "$exclude" = no || exclude_flag="-c"
	$decompress_cat "$distfile" |
		${PAX} ${EXTRACT_OPTS_PAX} $exclude_flag -r ${1+"$@"}
	;;

shar)
	$decompress_cat "$distfile" | ${SH}
	;;

zip)
	: ${EXTRACT_OPTS_ZIP=-aqo}
	${TEST} "$exclude" = "no" || exclude_flag="-x"
	if ${TEST} -n "$exclude_file"; then
		set -- dummy `${CAT} "$exclude_file"`; shift
	fi
	${UNZIP_CMD} ${EXTRACT_OPTS_ZIP} "$distfile" $exclude_flag ${1+"$@"}
	;;

lha)
	: ${EXTRACT_OPTS_LHA=q}
	${LHA} x${EXTRACT_OPTS_LHA} "$distfile" ${1+"$@"}
	;;

compressed)
	target="${distfile##*/}"; target="${target%.*}"
	$decompress_cat "$distfile" > "$target"
	;;

zoo)
	: ${EXTRACT_OPTS_ZOO=}
	${UNZOO} -x ${EXTRACT_OPTS_ZOO} "$distfile" ${1+"$@"}
	;;

rar)
	: ${EXTRACT_OPTS_RAR=-inul}
	${UNRAR} x ${EXTRACT_OPTS_RAR} "$distfile" ${1+"$@"}
	;;

rpm)
	: ${EXTRACT_OPTS_RPM=}
	${RPM2PKG} -d . ${EXTRACT_OPTS_RPM} "$distfile" ${1+"$@"}
	;;

jre-bin)
	: ${EXTRACT_OPTS_BIN=}
	${ECHO} yes | "$distfile" ${EXTRACT_OPTS_BIN} >/dev/null
	;;

gem)
	${GEM} "$distfile"
	;;

7z)
	${P7ZA} "$distfile"
	;;

zst)
	: ${EXTRACT_OPTS_ZSTD=}
	${ZSTD} -d ${EXTRACT_OPTS_ZSTD} "$distfile" ${1+"$@"}
	;;

none)
	# By default, copy the distfile over to the current working directory.
	${CP} "$distfile" .
	;;

*)
	${ECHO} 1>&2 "$self: archive format not recognized -- $format"
	exit 1
	;;
esac

exit 0