001/*
002 * Copyright (c) 2018 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.local.tap.splunk;
022
023import java.io.InputStream;
024import java.io.OutputStream;
025import java.io.PrintWriter;
026import java.util.Properties;
027import java.util.TimeZone;
028
029import cascading.flow.FlowProcess;
030import cascading.scheme.SinkCall;
031import cascading.scheme.local.TextDelimited;
032import cascading.tap.Tap;
033import cascading.tuple.Fields;
034import cascading.tuple.type.DateType;
035import com.splunk.JobExportArgs;
036
037/**
038 * Class SplunkCSV is a {@link cascading.scheme.Scheme} that enables CSV export from a Splunk instance.
039 * <p>
040 * Any given declared source {@link Fields} will be passed to the underlying export request. By default only the
041 * default fields will be retrieved, but will be declared as {@link Fields#UNKNOWN} in the final plan.
042 * <p>
043 * This Scheme may only be used to source data from Splunk.
044 * <p>
045 * Default fields from web service, but not guaranteed
046 * <pre>{@code
047 * "_serial"
048 * "_time" ->  yyyy-MM-dd HH:mm:ss.SSS z
049 * "source"
050 * "sourcetype"
051 * "host"
052 * "index"
053 * "splunk_server"
054 * "_raw"
055 * }</pre>
056 * <p>
057 */
058public class SplunkCSV extends TextDelimited implements SplunkScheme
059  {
060  public static final DateType DATE_TYPE = new DateType( "yyyy-MM-dd HH:mm:ss.SSS z", TimeZone.getTimeZone( "UTC" ) );
061  public static final Fields _TIME = new Fields( "_time", DATE_TYPE );
062  public static final Fields _SERIAL = new Fields( "_serial", Long.class ); // optional
063  public static final Fields SOURCE = new Fields( "source", String.class );
064  public static final Fields SOURCETYPE = new Fields( "sourcetype", String.class );
065  public static final Fields HOST = new Fields( "host", String.class );
066  public static final Fields INDEX = new Fields( "index", String.class );
067  public static final Fields SPLUNK_SERVER = new Fields( "splunk_server", String.class );
068  public static final Fields _RAW = new Fields( "_raw", String.class );
069  public static final Fields _INDEXTIME = new Fields( "_indextime", long.class );
070  public static final Fields _SUBSECOND = new Fields( "_subsecond", float.class );
071  public static final Fields TIMESTARTPOS = new Fields( "timestartpos", long.class ); // start pos of time in _raw field
072  public static final Fields TIMEENDPOS = new Fields( "timeendpos", long.class ); // end pos of time in _raw field
073
074  /**
075   * Typical default fields emitted from an export if no fields specified.
076   */
077  public static final Fields DEFAULTS = Fields.NONE
078    .append( _SERIAL )
079    .append( _TIME )
080    .append( SOURCE )
081    .append( SOURCETYPE )
082    .append( HOST )
083    .append( INDEX )
084    .append( SPLUNK_SERVER )
085    .append( _RAW );
086
087  /**
088   * All known internal Splunk fields.
089   */
090  public static final Fields KNOWN = DEFAULTS
091    .append( _INDEXTIME )
092    .append( _SUBSECOND )
093    .append( TIMESTARTPOS )
094    .append( TIMEENDPOS );
095
096  /**
097   * Instantiates a new SplunkCSV instance.
098   */
099  public SplunkCSV()
100    {
101    super( Fields.ALL, true, false, ",", "\"", null );
102    }
103
104  /**
105   * Instantiates a new SplunkCSV instance that returns the given fields.
106   *
107   * @param fields the fields
108   */
109  public SplunkCSV( Fields fields )
110    {
111    super( fields, true, false, ",", "\"", null );
112    }
113
114  @Override
115  public boolean isSink()
116    {
117    return false;
118    }
119
120  @Override
121  public void sink( FlowProcess<? extends Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall )
122    {
123    throw new UnsupportedOperationException( "sinking is not supported" );
124    }
125
126  @Override
127  public void sourceConfInit( FlowProcess<? extends Properties> flowProcess, Tap<Properties, InputStream, OutputStream> tap, Properties conf )
128    {
129    super.sourceConfInit( flowProcess, tap, conf );
130
131    JobExportArgs args = new JobExportArgs();
132
133    args.setOutputMode( JobExportArgs.OutputMode.CSV );
134
135    if( getSourceFields().isDefined() )
136      {
137      Fields sourceFields = getSourceFields();
138      String[] fields = new String[ sourceFields.size() ];
139
140      for( int i = 0; i < sourceFields.size(); i++ )
141        fields[ i ] = sourceFields.get( i ).toString();
142
143      args.setFieldList( fields );
144      }
145
146    conf.put( "args", args );
147    }
148
149  @Override
150  public Fields retrieveSourceFields( FlowProcess<? extends Properties> process, Tap tap )
151    {
152    return getSourceFields();
153    }
154  }