package pipelines

import java.util

import com.microsoft.azure.documentdb.hadoop.ConfigurationUtil
import org.apache.spark.SparkConf
import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType}
import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Minutes, StreamingContext}

object GenericPipeline {

  def initialize(): (StreamingContext, SparkSession,
    ReceiverInputDStream[(String, String)]) = {

    val conf = new SparkConf().setAppName(getClass.getSimpleName.split("\\$").last)
      .setMaster(System.getProperty("MASTERNODE"))
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

    val ssc = new StreamingContext(conf, Minutes(System.getProperty("TIMEWINDOWMINUTES").toInt))

    val session = SparkSession.builder.config(conf).getOrCreate

    session.sparkContext.hadoopConfiguration
      .set(ConfigurationUtil.DB_HOST, System.getProperty("DBOUTHOST"))
    session.sparkContext.hadoopConfiguration
      .set(ConfigurationUtil.DB_NAME, System.getProperty("DBOUTNAME"))
    session.sparkContext.hadoopConfiguration
      .set(ConfigurationUtil.OUTPUT_COLLECTION_NAMES, System.getProperty("DBOUTCOLLECTIONNAME"))
    session.sparkContext.hadoopConfiguration
      .set(ConfigurationUtil.DB_KEY, System.getProperty("DBOUTKEY"))

    val topicMap = Map(System.getProperty("KAFKATOPIC") -> 1)
    val kafkaStream = KafkaUtils
      .createStream(ssc, System.getProperty("KAFKAURL"),
                    getClass.getSimpleName.split("\\$").last + System.getProperty("OUTKAFKATOPIC"),
                    topicMap)

    (ssc, session, kafkaStream)
  }

  def extractGraphComponents(msgs: Dataset[Row],
                             session: SparkSession): (RDD[(VertexId, String)],
    RDD[Edge[(BigInt, Long, Long)]]) = {

    val fromAccts = msgs.select("from").distinct
    val toAccts = msgs.select("to").distinct
    val acctsIds = session.createDataFrame(
      fromAccts.union(toAccts).rdd.map(row => row.getAs[String]("from")).zipWithIndex
        .map(row => Row(row._1, row._2)), new StructType(
        Array(StructField("acct", StringType, nullable = false),
              StructField("id", LongType, nullable = false))))

    val modifiedMsgs = msgs.join(acctsIds, msgs.col("from") === acctsIds.col("acct"), "inner")
      .drop("acct").withColumnRenamed("id", "from_id")
      .join(acctsIds, msgs.col("to") === acctsIds.col("acct"), "inner").drop("acct")
      .withColumnRenamed("id", "to_id")

    val vertices: RDD[(VertexId, String)] = acctsIds.rdd.map(row => (row.getAs[Long]("id"),
      row.getAs[String]("acct")))

    val edges: RDD[Edge[(BigInt, Long, Long)]] = modifiedMsgs.rdd.map(
      msg => Edge(msg.getAs[Long]("from_id"), msg.getAs[Long]("to_id"),
                  (BigInt(msg.getAs[String]("value")), msg.getAs[Long]("timestamp"), msg
                    .getAs[Long]("block_number"))))

    (vertices, edges)
  }

  def formatRanking(graph: Graph[(String, BigInt), (BigInt, Long, Long)], session:
  SparkSession, windowSize: Long, rankType: String): util.Map[String, Object] = {

    import session.implicits._

    var rawRanking: Array[util.HashMap[String, Object]] = null
    var tableRanking: Array[util.HashMap[String, Object]] = null

    if (rankType.equals("value_sent")) {

      rawRanking = graph.vertices.map(vertex => (vertex._2._1, vertex._2._2)).filter(_._2 > 0)
        .sortBy(_._2, false)
        .collect.zipWithIndex.map { rankEntry =>
        val rankEntryMap = new util.HashMap[String, Object]()
        rankEntryMap.put("account", rankEntry._1._1.asInstanceOf[Object])
        rankEntryMap.put("score", rankEntry._1._2.asInstanceOf[Object])
        rankEntryMap.put("index", (rankEntry._2 + 1).asInstanceOf[Object])
        rankEntryMap
                                  }

      tableRanking = graph.vertices.map(vertex => (vertex._2._2, Array(vertex._2._1)))
        .filter(_._1 > 0)
        .reduceByKey(_ ++ _).sortBy(_._1, false).collect.zipWithIndex
        .map { rankEntry =>
          val rankEntryMap = new util.HashMap[String, Object]()
          rankEntryMap.put("accounts", rankEntry._1._2.asInstanceOf[Object])
          rankEntryMap.put("score", rankEntry._1._1.asInstanceOf[Object])
          rankEntryMap.put("index", (rankEntry._2 + 1).asInstanceOf[Object])
          rankEntryMap
             }
    }
    else {

      rawRanking = graph.vertices.map(vertex => (vertex._2._1, vertex._2._2))
        .filter(vertex => vertex._2 < Long.MaxValue && vertex._2 > 0)
        .sortBy(_._2)
        .collect.zipWithIndex.map { rankEntry =>
        val rankEntryMap = new util.HashMap[String, Object]()
        rankEntryMap.put("account", rankEntry._1._1.asInstanceOf[Object])
        rankEntryMap.put("score", rankEntry._1._2.asInstanceOf[Object])
        rankEntryMap.put("index", (rankEntry._2 + 1).asInstanceOf[Object])
        rankEntryMap
                                  }

      tableRanking = graph.vertices.map(vertex => (vertex._2._2, Array(vertex._2._1)))
        .filter(vertex => vertex._1 < Long.MaxValue && vertex._1 > 0)
        .reduceByKey(_ ++ _).sortBy(_._1).collect.zipWithIndex
        .map { rankEntry =>
          val rankEntryMap = new util.HashMap[String, Object]()
          rankEntryMap.put("accounts", rankEntry._1._2.asInstanceOf[Object])
          rankEntryMap.put("score", rankEntry._1._1.asInstanceOf[Object])
          rankEntryMap.put("index", (rankEntry._2 + 1).asInstanceOf[Object])
          rankEntryMap
             }
    }

    val maxBlockNumber = session.sql(
      "select max(block_number) from msgs")
      .map(
        row => row.getAs[Long](0))
      .collect()(0)

    var rankingMap: java.util.Map[String, Object] = new util.HashMap[String, Object]()
    rankingMap.put("rank_pos", rawRanking)
    rankingMap.put("rank", tableRanking)
    rankingMap.put("id", (maxBlockNumber.toString + windowSize.toString + rankType.toString)
      .asInstanceOf[Object])
    rankingMap.put("block_number", maxBlockNumber.asInstanceOf[Object])
    rankingMap.put("window_size", windowSize.asInstanceOf[Object])
    rankingMap.put("type", rankType.asInstanceOf[Object])

    rankingMap
  }
}
