#!/bin/bash

############################################################################
#
# Version: $Revision$
#
# Date: $Date$
#
# Copyright (c) 2002-2009, The DSpace Foundation.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# - Neither the name of the DSpace Foundation nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.
#
# Author        : Cody Green
#                 Digital Initiatives Research
#                 Texas A&M University
# Email         : codygreen@tamu.edu
# Description   : Shell script that helps migrate data from a development
#                 server to a production server.  It will remove the handle
#                 and any entries in the dublin_core.xml file that will be
#                 duplicated upon importing the data into DSpace.
#
############################################################################

# Make sure user is in bash
if [ -z "$BASH" ]; then
        printf "ERROR: please run this script with the BASH shell.\n"
        exit 192
fi

# Check if SED is in the path
which sed > /dev/null
if [ $? -ne 0 ]; then
        # Check what OS user has
        if [ `uname -s` = "SunOS" ]; then
                # Declare variables
                declare -rx SED="/usr/bin/sed"
        fi

        if [ `uname -s` = "Linux" ]; then
                # Declare variables
                declare -rx SED="/bin/sed"
                fi

        # Sanity check for programs
        if [ ! -e $sed ]; then
                printf "ERROR: Can not find the program sed.\n
                please check that the program is installed or
                in your path"
                exit 1
        fi
else
        export SED=`which sed`
fi

# Check parameter count, show usage if incorrect number passed
if [ $# -eq 0 ]; then
        printf "Usage: dspace_migrate.sh [DIRECTORY]\n"
        printf "Example: /dspace/bin/dspace_migrate.sh /home/user/ETDs/ \n\n"
        printf "This script will remove the handle and any \nentries in the dublin_core.xml file that will \nbe duplicated upon importing the data into DSpace.\n\n"
        exit 0;
fi

# Check if users needs help
if [ $1 = "--help"  -o $1 = "--h" ]; then
        printf "Usage: dspace_migrate.sh [DIRECTORY]\n"
        printf "Example: /dspace/bin/dspace_migrate.sh /home/user/ETDs/ \n\n"
        printf "This script will remove the handle and any \nentries in the dublin_core.xml file that will \nbe duplicated upon importing the data into DSpace.\n\n"
        exit 0;
fi

# Check to see if the directory exists
if [ ! -d $1 ]; then
        printf "ERROR: $1 is not a valid directory.\n"
        exit 1;
fi

printf "Fixing Bad Data in ETDs\n"

# Loop through the directory
for i in ls $1/*/dublin_core.xml;
do
         # Ignore ls in the returned values
   if [ $i = "ls" ]; then
      continue
   fi
   printf "Checking $i...\n"
   # Check if file exists
   if [ ! -s $i ]; then
      printf "ERROR: $i does not exist of is empty\n"
      exit 1
   fi
   # copy file
   cp $i $i.orig
   if [ $? -ne 0 ]; then
      printf "ERROR: Could not copy $i to $i.orig \n"
      exit 1
   fi

   printf "   --removing null and duplicate values for $i\n"

   issued=`grep "element=\"date\" qualifier=\"issued\"" $i|cut -f2 -d \>|cut -f1 -d \<`
   available=`grep "element=\"date\" qualifier=\"accessioned\"" $i|cut -f2 -d \>|cut -f1 -d \<`

   if [ $issued = $available ]; then
    $SED "/><\/dcvalue>/d" $i | $SED "/element=\"date\" qualifier=\"accessioned\"/d" | $SED "/element=\"date\" qualifier=\"available\"/d" |$SED "/element=\"date\" qualifier=\"issued\"/d" | $SED "/element=\"identifier\" qualifier=\"uri\">http:\/\/hdl/d" | $SED "/element=\"description\" qualifier=\"provenance\"/,/<\/dcvalue>/d" | $SED "/element=\"format\" qualifier=\"extent\"/d" | $SED "/element=\"format\" qualifier=\"mimetype\"/d" | $SED "/bytes, checksum/d" > $i
   else
    printf " -- date issued $issued  \n"
   $SED "/><\/dcvalue>/d" $i | $SED "/element=\"date\" qualifier=\"accessioned\"/d" | $SED "/element=\"date\" qualifier=\"available\"/d" | $SED "/element=\"identifier\" qualifier=\"uri\">http:\/\/hdl/d" | $SED "/element=\"description\" qualifier=\"provenance\"/,/<\/dcvalue>/d" | $SED "/element=\"format\" qualifier=\"extent\"/d" | $SED "/element=\"format\" qualifier=\"mimetype\"/d" | $SED "/bytes, checksum/d" > $i
   fi

   if [ $? -ne 0 ]; then
      printf "ERROR: Could not fix $i\n"
      exit 1
   fi

   # Check to see if filesize is 0
   printf "   --checking filesize for $i\n"
   if [ ! -s $i ]; then
      printf "ERROR: filesize for $i is 0\n"
      exit 1
   fi

   # remove copy of file
   rm -f $i.orig
   if [ $? -ne 0 ]; then
      printf "ERROR: Could not remove $i.orig\n"
      exit 1
   fi

done
rm -f $1/*/handle
if [ $? -ne 0 ]; then
   printf "ERROR: Could not delete handles\n"
   exit 1
fi
printf "Data Has Been Fixed\n"
