
## Aggregate/extract variable from model for each member
## output files: variable_memnum.nc

dir=/nird/datalake/NS9039K/users/pgchiu/sst_nudge/norcpm2a_tro10Atl5m_ng2l1dA_w2
var=sst
ifilelist='ocn/hist/*.blom.hm.{1980..2020}-??.nc'
extractdir=extract

## if var is end with number: perhaps level
lev=$(grep -Eo '[0-9]+$' <<< "$var" || true)
if [ ! -z "$lev" ]; then
    pvar=${var%$lev}
else
    pvar=$var
fi

## function for is var in file?
function is_var_in_file () {
    ## return with echo
    varname=$1
    fn="$2"
    ## slow, need a better method
    #ncdump -v "$varname" "$fn" >/dev/null 2>/dev/null && exist=true || exist=false
    test -z "$(ncdump -h $fn| grep $varname)" && exist=false || exist=true
    echo -n $exist
    return 
}
function type_var_in_file () {
    ## return with echo
    varname=$1
    fn="$2"
    ## slow, need a better method
    type=`ncdump -h $fn| grep " $varname(" |  sed "s/\s*\([^ ]*\) *$varname.*/\1/"`
    echo -n $type
    return 
}
function read_1level_from_hybrid_sigma () {
    ## make lev file
    test -f vrt_prs_${lev}.nc || ncap2 -O -v -s 'defdim("plev",1);plev[$plev]={'${lev}00'};' vrt_prs_${lev}.nc
    ## get var at specific lev
    ncremap --vrt_fl=vrt_prs_${lev}.nc -v $pvar $ifile ${ofn}_$$_tmp.nc
    ncwa -v $pvar -a plev ${ofn}_$$_tmp.nc $ofn
    rm -f ${ofn}_$$_tmp.nc
    ## rename to var
    ncrename -v ${pvar},${var} $ofn
}

mkdir -p $extractdir

memdirs=$(ls -d ${dir}/*mem?? | sort)
for d in $memdirs; do
    mem=$(echo $d | sed -e's/.*mem//')
    #o1=${var}_$(printf "%2.2d" $mem).nc
    o1=${var}_$mem.nc
    test -f "$o1" && continue
    t0=$(date +%s)
    echo "making ${o1}..."
    ifiles=$(ls ${d}/ocn/hist/*.blom.hm.{1980..2020}-??.nc)
    iifiles=''
    ## unpacking, can be parallelize but maybe later
    for ifile in $ifiles ; do
        ofn="${extractdir}/${var}_$(basename $ifile)"
        iifiles="$iifiles $ofn"
        test -f "$ofn" && continue

        ## check if variable is in file, run once only
        vartype=$(type_var_in_file $var $ifile) || true
        if [ "$vartype" == 'short' ] ; then
            ncpdq -O -U -v${var} $ifile -o $ofn
        elif [ "$vartype" == 'float' ] ;then
            ncks -O -v${var} $ifile -o $ofn
        elif [ "$vartype" == 'double' ] ;then
            ncks -O -v${var} $ifile -o $ofn
        else ## if not, maybe var is tailed with plev, slow, need be parallel(?)
            if [ "$(is_var_in_file $pvar $ifile)" == 'true' ] && [ ! -z "$lev" ] ; then
                read_1level_from_hybrid_sigma
            elif [ "$var" == 'PRECT' ] ;then ## for derived var
                ## PRECT = PRECC + PRECL
                ncap2 -v -s 'PRECT=PRECL+PRECC' $ifile $ofn
            else
                echo "$var is not in $ifile"
                exit 1
            fi
        fi
    done
    wait
    ## NorESM (BLOM/MICOM)
    ncrcat -O -h -v ${var} ${iifiles} ${o1}_tmp1.nc
    ncatted  \
        -a coordinates,${var},d,, \
        -a valid_range,${var},d,, \
        ${o1}_tmp1.nc ${o1} && rm ${o1}_tmp1.nc
done
rm -rf $extractdir

