001/* 002 * Copyright (c) 2018 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.local.tap.splunk; 022 023import java.io.InputStream; 024import java.io.OutputStream; 025import java.io.PrintWriter; 026import java.util.Properties; 027import java.util.TimeZone; 028 029import cascading.flow.FlowProcess; 030import cascading.scheme.SinkCall; 031import cascading.scheme.local.TextDelimited; 032import cascading.tap.Tap; 033import cascading.tuple.Fields; 034import cascading.tuple.type.DateType; 035import com.splunk.JobExportArgs; 036 037/** 038 * Class SplunkCSV is a {@link cascading.scheme.Scheme} that enables CSV export from a Splunk instance. 039 * <p> 040 * Any given declared source {@link Fields} will be passed to the underlying export request. By default only the 041 * default fields will be retrieved, but will be declared as {@link Fields#UNKNOWN} in the final plan. 042 * <p> 043 * This Scheme may only be used to source data from Splunk. 044 * <p> 045 * Default fields from web service, but not guaranteed 046 * <pre>{@code 047 * "_serial" 048 * "_time" -> yyyy-MM-dd HH:mm:ss.SSS z 049 * "source" 050 * "sourcetype" 051 * "host" 052 * "index" 053 * "splunk_server" 054 * "_raw" 055 * }</pre> 056 * <p> 057 */ 058public class SplunkCSV extends TextDelimited implements SplunkScheme 059 { 060 public static final DateType DATE_TYPE = new DateType( "yyyy-MM-dd HH:mm:ss.SSS z", TimeZone.getTimeZone( "UTC" ) ); 061 public static final Fields _TIME = new Fields( "_time", DATE_TYPE ); 062 public static final Fields _SERIAL = new Fields( "_serial", Long.class ); // optional 063 public static final Fields SOURCE = new Fields( "source", String.class ); 064 public static final Fields SOURCETYPE = new Fields( "sourcetype", String.class ); 065 public static final Fields HOST = new Fields( "host", String.class ); 066 public static final Fields INDEX = new Fields( "index", String.class ); 067 public static final Fields SPLUNK_SERVER = new Fields( "splunk_server", String.class ); 068 public static final Fields _RAW = new Fields( "_raw", String.class ); 069 public static final Fields _INDEXTIME = new Fields( "_indextime", long.class ); 070 public static final Fields _SUBSECOND = new Fields( "_subsecond", float.class ); 071 public static final Fields TIMESTARTPOS = new Fields( "timestartpos", long.class ); // start pos of time in _raw field 072 public static final Fields TIMEENDPOS = new Fields( "timeendpos", long.class ); // end pos of time in _raw field 073 074 /** 075 * Typical default fields emitted from an export if no fields specified. 076 */ 077 public static final Fields DEFAULTS = Fields.NONE 078 .append( _SERIAL ) 079 .append( _TIME ) 080 .append( SOURCE ) 081 .append( SOURCETYPE ) 082 .append( HOST ) 083 .append( INDEX ) 084 .append( SPLUNK_SERVER ) 085 .append( _RAW ); 086 087 /** 088 * All known internal Splunk fields. 089 */ 090 public static final Fields KNOWN = DEFAULTS 091 .append( _INDEXTIME ) 092 .append( _SUBSECOND ) 093 .append( TIMESTARTPOS ) 094 .append( TIMEENDPOS ); 095 096 /** 097 * Instantiates a new SplunkCSV instance. 098 */ 099 public SplunkCSV() 100 { 101 super( Fields.ALL, true, false, ",", "\"", null ); 102 } 103 104 /** 105 * Instantiates a new SplunkCSV instance that returns the given fields. 106 * 107 * @param fields the fields 108 */ 109 public SplunkCSV( Fields fields ) 110 { 111 super( fields, true, false, ",", "\"", null ); 112 } 113 114 @Override 115 public boolean isSink() 116 { 117 return false; 118 } 119 120 @Override 121 public void sink( FlowProcess<? extends Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall ) 122 { 123 throw new UnsupportedOperationException( "sinking is not supported" ); 124 } 125 126 @Override 127 public void sourceConfInit( FlowProcess<? extends Properties> flowProcess, Tap<Properties, InputStream, OutputStream> tap, Properties conf ) 128 { 129 super.sourceConfInit( flowProcess, tap, conf ); 130 131 JobExportArgs args = new JobExportArgs(); 132 133 args.setOutputMode( JobExportArgs.OutputMode.CSV ); 134 135 if( getSourceFields().isDefined() ) 136 { 137 Fields sourceFields = getSourceFields(); 138 String[] fields = new String[ sourceFields.size() ]; 139 140 for( int i = 0; i < sourceFields.size(); i++ ) 141 fields[ i ] = sourceFields.get( i ).toString(); 142 143 args.setFieldList( fields ); 144 } 145 146 conf.put( "args", args ); 147 } 148 149 @Override 150 public Fields retrieveSourceFields( FlowProcess<? extends Properties> process, Tap tap ) 151 { 152 return getSourceFields(); 153 } 154 }