(ns singularity.amazon

  "A simple interface to Amazon's Mechanical Turk service.  Instead of using
  Amazon's own Java library, we rolled our own.  Amazon's library is some
  serious overkill, requiring several dozen Java libraries just to make a
  few HTTP requests and parse the XML results.  We do the same thing just
  using Enlive, coupled with a little help from clj-apache-http."

  (:use [clojure.walk :only (keywordize-keys)]
        [singularity.support :only (failure)])
  (:require fs
            config
            [clojure.java.io :as io]
            [clojure.data.json :as json]
            [clojure.tools.logging :as log]
            [clojure.string :as str]
            [com.twinql.clojure.http :as http]
            [net.cgrand.enlive-html :as enlive])
  (:import [brilliantarc.singularity AmazonSignature SingularityException]
           [java.io StringReader]))

(def ^:dynamic *service* nil)

(def settings (:amazon config/settings))
(def amazon-signature (AmazonSignature. (:secret settings)))

(def valid-assignment-status {:submitted "Submitted" :approved "Approved" :rejected "Rejected"})

(defn- as-float
  [value]
  (if (string? value)
    (try
      (Float/parseFloat value)
      (catch NumberFormatException e 0.0))
    (or value 0.0)))

(defn- as-int
  [value]
  (if (string? value)
    (try
      (Integer/parseInt value)
      (catch NumberFormatException e 0))
    (or value 0)))

(defn which-hits
  "Look for the hits file in either /etc/singularity, or in the local
  directory.  Returns a Buffered pointing to the correct hits file."
  []
  (if (fs/readable? "/etc/singularity/hits.json")
    (io/reader "/etc/singularity/hits.json")
    (io/reader "hits.json")))

(defn load-hits
  "Load the hits.json file."
  []
  (keywordize-keys (json/read-json (which-hits))))

(defn hit-type
  "Look up a HIT type from our hits.json configuration file.  Doesn't use the
  Amazon HIT types, since we don't really need them."
  [portfolio name]
  (let [hits (load-hits)
        portfolio-hits (get hits (keyword portfolio))]
    (if portfolio-hits
      (get portfolio-hits (keyword name)))))

(defn- timestamp
  "Return an Amazon-compatible ISO-8601 timestamp"
  []
  (.timestamp amazon-signature))

(defn- signature
  "Generate an Amazon-compatible signature for Mechanical Turk."
  [operation timestamp]
  (.turk amazon-signature operation timestamp))

(defn- amazon-parameters
  "Merge our parameters with the required parameters for Amazon.  Generates
  the timestamp and signature as well, so you only have to pass in the
  parameters for your operation specifically.

  Typically you don't call this directly, but instead call amazon-url.  It
  will call this function."
  [operation & [parameters]]
  (let [right-now (timestamp)
        parameters (or parameters {})]
    (merge parameters
      {:Service "AWSMechanicalTurkRequester"
       :Operation operation
       :AWSAccessKeyId (:access settings)
       :Version "2008-08-02"
       :Timestamp right-now
       :Signature (signature operation right-now)})))

(defn- amazon-url
  "Generate a URL to submit to Amazon.  Includes the required Amazon query
  parameters; pass in any additional parameters you would like to set."
  [operation & [parameters]]
  (let [encoded (http/encode-query (amazon-parameters operation parameters))]
    (log/debug "Sending to Amazon:" (str (:url settings) "?" encoded))
    (java.net.URL. (str (:url settings) "?" encoded))))

(defn select
  "Helper to parse XML via Enlive.  Pass in your XML node and path, like you
  were calling enlive/select, and this will extract the first text value it
  finds, or nil if the path doesn't exist."
  [[node] path]
  (enlive/text (first (enlive/select [node] path))))

(defn- extract-error-message [node]
  "Parse an error message out of an Amazon XML response."
  (select node [:Errors :Error :Message ]))

(defn- extract-account-balance [[node]]
  "Parse the details about the current account balance out of an Amazon XML
  response."
  (let [available-balance (enlive/select [node] [:GetAccountBalanceResult :AvailableBalance ])
        amount (as-float (select available-balance [:Amount ]))
        currency-code (select available-balance [:CurrencyCode ])
        formatted-price (select available-balance [:FormattedPrice ])]
    {:amount amount :currency currency-code :formatted formatted-price}))

(defn- extract-hit [[node]]
  "Parse a HIT's XML.  Returns a detailed HIT, but not all values may be
  complete depending on what Amazon returns."
  (let [hit (enlive/select [node] [:HIT ])
        [_ question] (re-find #"<ExternalURL>([^<]*)</ExternalURL>" (select hit [:Question ]))]
    {:id (select hit [:HITId ])
     :created (select hit [:CreationTime ])
     :title (select hit [:Title ])
     :description (select hit [:Description ])
     :question question
     :keywords (select hit [:Keywords ])
     :status (select hit [:HITStatus ])
     :assignments (as-int (select hit [:MaxAssignments ]))
     :reward (as-float (select hit [:Reward :Amount ]))
     :autoApproval (as-int (select hit [:AutoApprovalDelayInSeconds ]))
     :expiration (select hit [:Expiration ])
     :duration (as-int (select hit [:AssignmentDurationInSeconds ]))
     :similarHits (as-int (select hit [:NumberOfSimilarHITs ]))
     :annotation (select hit [:RequesterAnnotation ])
     :qualifications nil ;; TODO!!!
     :reviewStatus (select hit [:HITReviewStatus ])
     :assignmentsPending (as-int (select hit [:NumberOfAssignmentsPending ]))
     :assignmentsAvailable (as-int (select hit [:NumberOfAssignmentsAvailable ]))
     :assignmentsCompleted (as-int (select hit [:NumberOfAssignmentsCompleted ]))}))

(defn parse-answer [answer]
  (let [property (select [answer] [:QuestionIdentifier ])
        [_ property brackets] (re-matches #"([^\[]*)(\[\])" property)]
    (if brackets
      [property (str/split (select [answer] [:FreeText ]) #"\|")])))

(defn answers-to-tags
  [answers]
  (loop [results {}
         answers answers]
    (if (empty? answers)
      results
      (let [[[property tags] & rest] answers]
        (recur (assoc results property tags) rest)))))

(defn parse-answers [xml]
  (let [answers (enlive/select (enlive/xml-resource (StringReader. xml)) [:Answer ])
        tags (answers-to-tags (filter #(not (nil? %)) (map parse-answer answers)))
        state (some #(if (= (select [%] [:QuestionIdentifier ]) "state") (select [%] [:FreeText ])) answers)
        [_ space] (re-matches #"^space:(.*)$" state)
        [_ invalid] (re-matches #"^invalid:(.*)$" state)]
    {:tags tags :space space :invalid invalid}))

(defn- extract-assignment [[node]]
  (let [assignment (enlive/select [node] [:Assignment ])]
    {:id (select assignment [:AssignmentId ])
     :worker (select assignment [:WorkerId ])
     :hit (select assignment [:HITId ])
     :status (select assignment [:AssignmentStatus ])
     :autoAproval (select assignment [:AutoApprovalTime ])
     :accepted (select assignment [:AcceptTime ])
     :submitted (select assignment [:SubmitTime ])
     :approved (select assignment [:ApprovalTime ])
     :rejected (select assignment [:RejectionTime ])
     :deadline (select assignment [:Deadline ])
     ;     :answer-xml (select assignment[:Answer])}))
     :answer (parse-answers (select assignment [:Answer ]))}))

(defn- extract-assignments [node]
  (map #(extract-assignment [%]) (enlive/select node [:Assignment ])))

(defn extract-search-hits [node]
  (let [results (as-int (select node [:NumResults ]))
        page (as-int (select node [:PageNumber ]))
        total-results (as-int (select node [:TotalNumResults ]))
        hits (map #(extract-hit [%]) (enlive/select node [:HIT ]))]
    {:total-hits total-results :page page :hits hits}))

(defn successful?
  "Was the request to Amazon successful?  Checks for the IsValid == True state."
  [response]
  (= (select response [:Request :IsValid ]) "True"))

(defn- external-question
  "Convert the URL into an external question XML for creating a HIT."
  [url & [height]]
  (let [height (or height 1024)
        url (str/replace (str/replace url ">" "&gt;") "<" "&lt;")]
    (str "<ExternalQuestion xmlns=\"http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd\"><ExternalURL>"
      url "</ExternalURL><FrameHeight>" height "</FrameHeight></ExternalQuestion>")))

(defn qualification-requirement
  "Convert the qualification settings into the XML required to create the
  qualification (QualificationRequirement)."
  [qualification counter]
  (let [{type :type comparator :comparator value :value} qualification
        value
        (if (integer? value)
          {"IntegerValue" value}
          {"LocaleValue" value})]
    (into {} (map (fn [[k v]] [(str "QualificationRequirement." counter "." k) v])
               (merge {"QualificationTypeId" type "Comparator" comparator} value)))))

(defn qualification-requirements
  "Take the qualification requirements for a HIT and transform them into proper
  Amazon HTTP requests."
  ([parameters [qualification & rest] counter]
    (if qualification
      (qualification-requirements
        (merge parameters (qualification-requirement qualification counter)) rest (inc counter))
      parameters))
  ([parameters qualifications] (qualification-requirements parameters qualifications 1)))

(defn request
  "Generate a request to Amazon and handle the results.  We seem to do this a
  lot, so let's go DRY."
  [operation & [parameters error-message error-status]]
  (let [url (amazon-url operation parameters)
        response (enlive/xml-resource url)]
    (if (successful? response)
      response
      (let [error-message (or error-message "Amazon reported a problem: %s")
            error-status (as-int error-status)
            error-status (if (> error-status 0) error-status 500)]
        (failure error-status (format error-message (extract-error-message response)))))))


;; ...below this are all the Amazon calls...


(defn has-funds
  "Check if the Amazon account has funds (money) to support the request."
  []
  (let [account-balance (extract-account-balance (request "GetAccountBalance"))]
    (> (:amount account-balance) 0)))

(defn get-hit
  "Request the details about a HIT from Amazon, using its HIT ID.  Will raise
  a 404 exception if the HIT does not exist."
  [hit-id]
  (extract-hit (request "GetHIT" {:HITId hit-id} (str "Amazon failed to return a HIT for ID " hit-id ": %s") 404)))

(defn sample-hit
  [portfolio type url & [annotation]]
  (let [properties (hit-type portfolio type)]

    ;; Make sure the hit type exists
    (if-not properties (failure 500 "The HIT type " type " is not defined in hits.json."))
    (if-not (has-funds) (failure 500 "Amazon indicates insufficient funds to process HITs."))

    (let [question (external-question url (:height properties))
          {:keys [title description keywords reward duration autoApproval lifetime assignments qualifications]} properties]
      (amazon-url "CreateHIT" (qualification-requirements {:Title title
                                                           :Description description
                                                           :Question question
                                                           :Reward.1.Amount reward
                                                           :Reward.1.CurrencyCode "USD"
                                                           :AssignmentDurationInSeconds duration
                                                           :LifetimeInSeconds lifetime
                                                           :Keywords keywords
                                                           :MaxAssignments assignments
                                                           :AutoApprovalDelayInSeconds autoApproval
                                                           :RequesterAnnotation annotation} qualifications)
        "Failed to create a HIT with Amazon's servers: %s")
      nil)))

(defn create-hit
  "Create a new HIT with Amazon's Mechanical Turk using the settings described
  by the type in hits.json.  The URL is transformed into an external question,
  pointing to that URL.  Annotation is optional, and if present, will be
  associated with the HIT.

  Uses portfolio and type to look up the settings as defined in hits.json.

  If successful, returns the HIT ID recevied from Amazon.  If not, throws a
  SingularityException."
  [portfolio type url & [annotation]]
  (let [properties (hit-type portfolio type)]

    ;; Make sure the hit type exists
    (if-not properties (failure 500 "The HIT type " type " is not defined in hits.json."))
    (if-not (has-funds) (failure 500 "Amazon indicates insufficient funds to process HITs."))

    (let [question (external-question url (:height properties))
          {:keys [title description keywords reward duration autoApproval lifetime assignments qualifications]} properties]
      (assoc (extract-hit (request "CreateHIT" (qualification-requirements {:Title title
                                                                            :Description description
                                                                            :Question question
                                                                            :Reward.1.Amount reward
                                                                            :Reward.1.CurrencyCode "USD"
                                                                            :AssignmentDurationInSeconds duration
                                                                            :LifetimeInSeconds lifetime
                                                                            :Keywords keywords
                                                                            :MaxAssignments assignments
                                                                            :AutoApprovalDelayInSeconds autoApproval
                                                                            :RequesterAnnotation annotation} qualifications)
                            "Failed to create a HIT with Amazon's servers: %s")) :question url))))

(defn expire-hit
  "Expire a HIT early.  Returns true if successful."
  [hit-id]
  (request "ForceExpireHIT" {:HITId hit-id} "Failed to expire HIT " hit-id ": %s")
  true)

(defn delete-hit
  "Delete a HIT, i.e. dispose of it on Amazon's servers.  Returns true if
  successful."
  [hit-id]
  (request "DisposeHIT" {:HITId hit-id} (str "Failed to delete HIT " hit-id ": %s"))
  true)

(defn get-hits
  "Return all the hits, regardless of the state they're in.  Paginates, by
  default with 25 items per page.  Returns a map for the results, with the
  following keys:

    total-hits    - the total number of our hits in the system (all pages)
    page          - the page number of hits we've returned
    per-page      - how many hits we're returning per page
    hits          - the HITs for this page of results
  "
  [& [page per-page]]
  (let [page (if (> (as-int page) 0) page 1)
        per-page (if (> (as-int per-page) 0) per-page 25)]
    (assoc (extract-search-hits (request "SearchHITs" {:PageSize per-page :PageNumber page} "Failed to find any HITs: %s"))
      :per-page per-page)))

(defn get-reviewable-hits
  "Get the HITs ready to be reviewied, i.e. with the status of 'Reviewable'.
  Paginates, by default with 25 items per page.  Returns a map for the results,
  with the following keys:

    total-hits    - the total number of our hits in the system (all pages)
    page          - the page number of hits we've returned
    per-page      - how many hits we're returning per page
    hits          - the HITs for this page of results
  "
  [& [page per-page]]
  (let [page (if (> (as-int page) 0) page 1)
        per-page (if (> (as-int per-page) 0) per-page 25)]
    (assoc (extract-search-hits (request "GetReviewableHITs" {:PageSize per-page :PageNumber page} "Failed to find any HITs: %s"))
      :per-page per-page)))

(defn reviewing
  "Update the status of the given HIT to 'Reviewing'.  That way it won't come
  back with the get-reviewable-hits anymore."
  [hit-id]
  (if-not (successful? (request "SetHITAsReviewing" {:HITId hit-id}))
    (failure 500 "Amazon failed to update HIT " hit-id " to 'Reviewing'.")))

(defn reviewable
  "Update the status of the given HIT to 'Reviewable'.  This will only work if
  the HIT is in the 'Reviewing' state."
  [hit-id]
  (if-not (successful? (request "SetHITAsReviewing" {:HITId hit-id :Revert true}))
    (failure 500 "Amazon failed to update HIT " hit-id " to 'Reviewable'.")))

(defn get-assignments
  "Get the assignments for the given HIT.  Status may be :submitted, :approved,
  :rejected.  If status is not provided or is not one of the acceptable values,
  gets all the assignments."
  [hit-id & [status]]
  (let [status (get valid-assignment-status status)
        options {:HITId hit-id :PageSize 20}
        options (if status (assoc options :AssignmentStatus status) options)]
    (extract-assignments (request "GetAssignmentsForHIT" options
                           (str "Unable to retrieve assignments for HIT " hit-id ": %s") 404))))

(defn approve-assignment
  "Approve the given assignment.  An optional comment may be included to send
  to the worker."
  [assignment-id & [comment]]
  (let [options {:AssignmentId assignment-id}
        options (if comment (assoc options :RequesterFeedback comment) options)]
    (if-not (successful? (request "ApproveAssignment" options))
      (failure 500 "Amazon failed to approve assignment " assignment-id))))

(defn reject-assignment
  "Reject the given assignment.  An optional comment may be included to send to
  the worker.  This is strongly recommended during a rejection."
  [assignment-id & [comment]]
  (let [options {:AssignmentId assignment-id}
        options (if comment (assoc options :RequesterFeedback comment) options)]
    (if-not (successful? (request "RejectAssignment" options))
      (failure 500 "Amazon failed to reject assignment " assignment-id))))

(defn add-assignment
  "Add one or more additional assignments to a HIT.  Defaults to 1.  Also adds
  additional time to the HIT."
  [hit-id & [total]]
  ;; Update the HIT back to 'Reviewable'
  ;; We have to do this first, or Amazon will mess the state up later
  (let [hit (get-hit hit-id)]
    (if (= (:status hit) "Reviewing")
      (reviewable hit-id)))

  (let [total (or total 1)]
    (if-not (successful? (request "ExtendHIT" {:HITId hit-id
                                               :MaxAssignmentsIncrement total
                                               :ExpirationIncrementInSeconds 172800}))
      (failure 500 "Amazon failed to extend HIT " hit-id))))

(defn apply-bonus
  "Give the given worker a bonus for a job well done.  Bonus is in U.S. Dollars.
  A reason is also required, so the worker knows why he or she is getting a
  bonus."
  [worker-id assignment-id bonus reason]
  (if-not (successful? (request "GrantBonus" {:WorkerId worker-id
                                              :AssignmentId assignment-id
                                              :BonusAmount.1.Amount bonus
                                              :BonusAmount.1.CurrencyCode "USD"
                                              :Reason reason}))
    (failure 500 "Amazon failed to send the bonus of " bonus " to " worker-id)))

(defn notify-workers
  "Send a message to up to 100 workers."
  [workers-id subject message]
  (if-not (successful? (request "NotifyWorkers" {:WorkerId workers-id
                                                 :Subject subject
                                                 :MessageText message}))
    (failure 500 "Amazon failed to notify workers regarding " subject)))

(defn clear-hits
  "This is only used during development and testing.  You should NEVER call this
  in a production setting or from any RESTful resource.  Will wipe out any HITs
  it can, and it doesn't do a very good job of failing gracefully."
  []
  (loop []
    (let [hits (:hits (get-hits))]
      (when (not (empty? hits))
        (doseq [hit hits]
          (let [hit-id (:id hit)]
            (try
              (expire-hit hit-id)
              (catch SingularityException e
                (log/warn "Failed to expire HIT" hit-id ":" (.getMessage e))))
            (try
              (delete-hit hit-id)
              (catch SingularityException e
                (log/warn "Failed to delete HIT" hit-id ":" (.getMessage e))))))
        (recur)))))
