#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc)

THIS_SCRIPT="$0"

# Resolve symlinks to this script
while [ -h "$THIS_SCRIPT" ] ; do
  ls=`ls -ld "$THIS_SCRIPT"`
  # Drop everything prior to ->
  link=`expr "$ls" : '.*-> \(.*\)$'`
  if expr "$link" : '/.*' > /dev/null; then
    THIS_SCRIPT="$link"
  else
    THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link"
  fi
done

SOLR_TIP=`dirname "$THIS_SCRIPT"`/..
SOLR_TIP=`cd "$SOLR_TIP"; pwd`

if [ -n "$SOLR_JAVA_HOME" ]; then
  JAVA="$SOLR_JAVA_HOME/bin/java"
elif [ -n "$JAVA_HOME" ]; then
  for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
    if [ -x "$java" ]; then
      JAVA="$java"
      break
    fi
  done
else
  JAVA=java
fi

# test that Java exists and is executable on this server
"$JAVA" -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; }


# ===== post specific code

TOOL_JAR=("$SOLR_TIP/dist"/solr-core-*.jar)

function print_usage() {
  echo ""
  echo 'Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]>'
  echo "    or post -help"
  echo ""
  echo "   collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
  echo ""
  echo "OPTIONS"
  echo "======="
  echo "  Solr options:"
  echo "    -url <base Solr update URL> (overrides collection, host, and port)"
  echo "    -host <host> (default: localhost)"
  echo "    -port <port> (default: 8983)"
  echo "    -commit yes|no (default: yes)"
  # optimize intentionally omitted, but can be used as '-optimize yes' (default: no)
  echo ""
  echo "  Web crawl options:"
  echo "    -recursive <depth> (default: 1)"
  echo "    -delay <seconds> (default: 10)"
  echo ""
  echo "  Directory crawl options:"
  echo "    -delay <seconds> (default: 0)"
  echo ""
  echo "  stdin/args options:"
  echo "    -type <content/type> (default: application/xml)"
  echo ""
  echo "  Other options:"
  echo "    -filetypes <type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
  echo "    -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)"
  echo "    -out yes|no (default: no; yes outputs Solr response to console)"
  echo ""
  echo ""
  echo "Examples:"
  echo ""
  echo "* JSON file: $THIS_SCRIPT -c wizbang events.json"
  echo "* XML files: $THIS_SCRIPT -c records article*.xml"
  echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
  echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
  echo "* Web crawl: $THIS_SCRIPT -c gettingstarted http://lucene.apache.org/solr -recursive 1 -delay 1"
  echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d"
  echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'"
  echo ""
} # end print_usage

if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
  print_usage
  exit
fi


COLLECTION="$DEFAULT_SOLR_COLLECTION"
PROPS=('-Dauto=yes')
RECURSIVE=""
FILES=()
URLS=()
ARGS=()

while [ $# -gt 0 ]; do
  # TODO: natively handle the optional parameters to SPT
  #       but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com

  if [[ -d "$1" ]]; then
    # Directory
#    echo "$1: DIRECTORY"
    RECURSIVE=yes
    FILES+=("$1")
  elif [[ -f "$1" ]]; then
    # File
#    echo "$1: FILE"
    FILES+=("$1")
  elif [[ "$1" == http* ]]; then
    # URL
#    echo "$1: URL"
    URLS+=("$1")
  else
    if [[ "$1" == -* ]]; then
      if [[ "$1" == "-c" ]]; then
        # Special case, pull out collection name
        shift
        COLLECTION="$1"
      elif [[ ("$1" == "-d" || "$1" == "--data" || "$1" == "-") ]]; then
        if [[ -s /dev/stdin ]]; then
          MODE="stdin"
        else
          # when no stdin exists and -d specified, the rest of the arguments
          # are assumed to be strings to post as-is
          MODE="args"
          shift
          if [[ $# -gt 0 ]]; then
            ARGS=("$@")
            shift $#
          fi
        fi
      else
        key="${1:1}"
        shift
#       echo "$1: PROP"
        PROPS+=("-D$key=$1")
      fi
    else
      echo -e "\nUnrecognized argument: $1\n"
      echo -e "If this was intended to be a data file, it does not exist relative to $PWD\n"
      exit 1
    fi
  fi
  shift
done

# Check for errors
if [[ $COLLECTION == "" ]]; then
  echo -e "\nCollection must be specified.  Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n"
  echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
  exit 1
fi

# Unsupported: bin/post -c foo
if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then
  echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified.\n"
  echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
  exit 1
fi

# SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings.
# The following are unsupported constructs:
#    bin/post -c foo existing_file.csv http://example.com
#    echo '<xml.../>' | bin/post -c foo existing_file.csv
#    bin/post -c foo existing_file.csv -d 'anything'
if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args")
      || ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then
  echo -e "\nCombining files/directories, URLs, stdin, or args is not supported.  Post them separately.\n"
  exit 1
fi

PARAMS=""

# TODO: let's simplify this
if [[ $MODE != "stdin" && $MODE != "args" ]]; then
  if [[ $FILES != "" ]]; then
    MODE="files"
    PARAMS=("${FILES[@]}")
  fi

  if [[ $URLS != "" ]]; then
    MODE="web"
    PARAMS=("${URLS[@]}")
  fi
else
  if [[ ${#ARGS[@]} == 0 ]]; then
    # SPT needs a valid (to post to Solr) args string, useful for 'bin/post -c foo -d' to force a commit
    ARGS+=("<add/>")
  fi
  PARAMS=("${ARGS[@]}")
fi

PROPS+=("-Dc=$COLLECTION" "-Ddata=$MODE")
if [[ -n "$RECURSIVE" ]]; then
  PROPS+=('-Drecursive=yes')
fi

echo "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
"$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}"

# post smoker:
# bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]'
# bin/post -c signals -out yes -params "wt=json" -d '<add><doc><field name="id">1</field></doc></add>'