001/*
002 * Copyright (c) 2017-2018 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.local.tap.aws.s3;
022
023import java.io.FilterInputStream;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.OutputStream;
027import java.io.PipedInputStream;
028import java.io.PipedOutputStream;
029import java.net.URI;
030import java.net.URISyntaxException;
031import java.util.ArrayList;
032import java.util.Iterator;
033import java.util.List;
034import java.util.Properties;
035import java.util.function.Predicate;
036import java.util.regex.Pattern;
037
038import cascading.CascadingException;
039import cascading.flow.FlowProcess;
040import cascading.property.PropertyUtil;
041import cascading.scheme.FileFormat;
042import cascading.scheme.Scheme;
043import cascading.tap.SinkMode;
044import cascading.tap.Tap;
045import cascading.tap.TapException;
046import cascading.tap.local.PartitionTap;
047import cascading.tap.type.FileType;
048import cascading.tap.type.TapWith;
049import cascading.tuple.TupleEntryCollector;
050import cascading.tuple.TupleEntryIterator;
051import cascading.tuple.TupleEntrySchemeCollector;
052import cascading.tuple.TupleEntrySchemeIterator;
053import cascading.util.CloseableIterator;
054import cascading.util.Util;
055import com.amazonaws.ClientConfiguration;
056import com.amazonaws.client.builder.AwsClientBuilder;
057import com.amazonaws.services.s3.AmazonS3;
058import com.amazonaws.services.s3.AmazonS3ClientBuilder;
059import com.amazonaws.services.s3.model.AmazonS3Exception;
060import com.amazonaws.services.s3.model.ObjectMetadata;
061import com.amazonaws.services.s3.model.S3ObjectSummary;
062import com.amazonaws.services.s3.transfer.TransferManager;
063import com.amazonaws.services.s3.transfer.TransferManagerBuilder;
064import com.amazonaws.services.s3.transfer.Upload;
065import com.amazonaws.services.s3.transfer.model.UploadResult;
066import org.slf4j.Logger;
067import org.slf4j.LoggerFactory;
068
069import static cascading.util.Util.isEmpty;
070
071/**
072 * Class S3Tap is a Cascading local-mode {@link Tap} providing read and write access to data stored in Amazon S3 buckets.
073 * <p>
074 * This Tap is not intended to be used with any of the other Cascading planners unless they specify they are local-mode
075 * compatible.
076 * <p>
077 * S3Tap can read a single key, all objects underneath a key-prefix, or all objects under a key-prefix that match
078 * a given globbing pattern.
079 * <p>
080 * See the various constructors for the available access parametrizations. Of note are the constructors that take
081 * a {@link URI} instance. The URI should be in the following format:
082 * {@code s3://[bucket]/<key|key-prefix><?glob>}
083 * <p>
084 * Where bucket is the only required value. The key references a single object, the key-prefix is used to access
085 * a set of objects with a common prefix value. The glob value is use to further narrow the resulting object set.
086 * <p>
087 * The globbing pattern is specified by the {@link java.nio.file.FileSystem#getPathMatcher} method.
088 * <p>
089 * This Tap was designed to allow applications to effectively poll an S3 bucket for new keys to be processed.
090 * <p>
091 * When used with the {@link S3FileCheckpointer} class, a map of keys last consumed by each bucket will be tracked
092 * on disk, with the map surviving JVM restarts allowing for applications to exit and restart safely without
093 * retrieving duplicate data.
094 * <p>
095 * The {@link S3Checkpointer#commit()} method is only called during a graceful shutdown of the Flow or JVM, but every
096 * consumed key is passed to the S3Checkpointer, so custom implementations can choose to persist the key more
097 * frequently.
098 * <p>
099 * AWS Credentials are handled by {@link com.amazonaws.auth.DefaultAWSCredentialsProviderChain}.
100 */
101public class S3Tap extends Tap<Properties, InputStream, OutputStream> implements FileType<Properties>, TapWith<Properties, InputStream, OutputStream>
102  {
103  /** Field LOG */
104  private static final Logger LOG = LoggerFactory.getLogger( S3Tap.class );
105
106  /** Field SEQUENCE_TOKEN */
107  public static final String SEQUENCE_TOKEN = "{sequence}";
108  /** Field MIME_DIRECTORY */
109  public static final String MIME_DIRECTORY = "application/x-directory";
110  /** Field DEFAULT_DELIMITER */
111  public static final String DEFAULT_DELIMITER = "/";
112
113  /** Field s3Client */
114  AmazonS3 s3Client;
115  /** Field bucketName */
116  String bucketName;
117  /** Field key */
118  String key;
119  /** Field filter */
120  Predicate<String> filter;
121  /** Field delimiter */
122  String delimiter = DEFAULT_DELIMITER;
123  /** Field checkpointer */
124  S3Checkpointer checkpointer;
125
126  private transient ObjectMetadata objectMetadata;
127  private transient TransferManager transferManager;
128
129  /**
130   * Method makeURI creates a new S3 URI from the given parameters.
131   *
132   * @param bucketName the S3 bucket name
133   * @param keyPrefix  the S3 object key or key-prefix
134   * @return an URI instance
135   */
136  public static URI makeURI( String bucketName, String keyPrefix )
137    {
138    return makeURI( bucketName, keyPrefix, null );
139    }
140
141  /**
142   * Method makeURI creates a new S3 URI from the given parameters.
143   *
144   * @param bucketName the S3 bucket name
145   * @param keyPrefix  the S3 object key or key-prefix
146   * @param glob       the globbing pattern to apply to the keys
147   * @return an URI instance
148   */
149  public static URI makeURI( String bucketName, String keyPrefix, String glob )
150    {
151    if( bucketName == null )
152      throw new IllegalArgumentException( "bucketName may not be null" );
153
154    try
155      {
156      if( keyPrefix == null )
157        keyPrefix = "/";
158      else if( !keyPrefix.startsWith( "/" ) )
159        keyPrefix = "/" + keyPrefix;
160
161      return new URI( "s3", bucketName, keyPrefix, glob, null );
162      }
163    catch( URISyntaxException exception )
164      {
165      throw new IllegalArgumentException( exception.getMessage(), exception );
166      }
167    }
168
169  /**
170   * Constructor S3Tap creates a new S3Tap instance.
171   *
172   * @param scheme     of Scheme
173   * @param bucketName of String
174   */
175  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName )
176    {
177    this( scheme, bucketName, null, null, null, SinkMode.KEEP );
178    }
179
180  /**
181   * Constructor S3Tap creates a new S3Tap instance.
182   *
183   * @param scheme     of Scheme
184   * @param bucketName of String
185   * @param key        of String
186   */
187  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key )
188    {
189    this( scheme, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP );
190    }
191
192  /**
193   * Constructor S3Tap creates a new S3Tap instance.
194   *
195   * @param scheme     of Scheme
196   * @param bucketName of String
197   * @param key        of String
198   * @param delimiter  of String
199   */
200  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter )
201    {
202    this( scheme, null, null, bucketName, key, delimiter, SinkMode.KEEP );
203    }
204
205  /**
206   * Constructor S3Tap creates a new S3Tap instance.
207   *
208   * @param scheme     of Scheme
209   * @param bucketName of String
210   * @param filter     of Predicate
211   */
212  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, Predicate<String> filter )
213    {
214    this( scheme, bucketName, null, filter, SinkMode.KEEP );
215    }
216
217  /**
218   * Constructor S3Tap creates a new S3Tap instance.
219   *
220   * @param scheme     of Scheme
221   * @param bucketName of String
222   * @param key        of String
223   * @param filter     of Predicate
224   */
225  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, Predicate<String> filter )
226    {
227    this( scheme, bucketName, key, DEFAULT_DELIMITER, filter, SinkMode.KEEP );
228    }
229
230  /**
231   * Constructor S3Tap creates a new S3Tap instance.
232   *
233   * @param scheme     of Scheme
234   * @param bucketName of String
235   * @param key        of String
236   * @param delimiter  of String
237   * @param filter     of Predicate
238   */
239  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter, Predicate<String> filter )
240    {
241    this( scheme, null, null, bucketName, key, delimiter, filter, SinkMode.KEEP );
242    }
243
244  /**
245   * Constructor S3Tap creates a new S3Tap instance.
246   *
247   * @param scheme     of Scheme
248   * @param s3Client   of AmazonS3
249   * @param bucketName of String
250   */
251  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName )
252    {
253    this( scheme, s3Client, bucketName, null, SinkMode.KEEP );
254    }
255
256  /**
257   * Constructor S3Tap creates a new S3Tap instance.
258   *
259   * @param scheme     of Scheme
260   * @param s3Client   of AmazonS3
261   * @param bucketName of String
262   * @param key        of String
263   */
264  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key )
265    {
266    this( scheme, s3Client, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP );
267    }
268
269  /**
270   * Constructor S3Tap creates a new S3Tap instance.
271   *
272   * @param scheme     of Scheme
273   * @param s3Client   of AmazonS3
274   * @param bucketName of String
275   * @param key        of String
276   * @param delimiter  of String
277   */
278  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter )
279    {
280    this( scheme, s3Client, bucketName, key, delimiter, null, SinkMode.KEEP );
281    }
282
283  /**
284   * Constructor S3Tap creates a new S3Tap instance.
285   *
286   * @param scheme     of Scheme
287   * @param s3Client   of AmazonS3
288   * @param bucketName of String
289   * @param key        of String
290   * @param delimiter  of String
291   * @param filter     of Predicate
292   */
293  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, Predicate<String> filter )
294    {
295    this( scheme, s3Client, null, bucketName, key, delimiter, filter, SinkMode.KEEP );
296    }
297
298  /**
299   * Constructor S3Tap creates a new S3Tap instance.
300   *
301   * @param scheme       of Scheme
302   * @param checkpointer of S3Checkpointer
303   * @param bucketName   of String
304   */
305  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName )
306    {
307    this( scheme, checkpointer, bucketName, null, null, null, SinkMode.KEEP );
308    }
309
310  /**
311   * Constructor S3Tap creates a new S3Tap instance.
312   *
313   * @param scheme       of Scheme
314   * @param checkpointer of S3Checkpointer
315   * @param bucketName   of String
316   * @param key          of String
317   */
318  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key )
319    {
320    this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP );
321    }
322
323  /**
324   * Constructor S3Tap creates a new S3Tap instance.
325   *
326   * @param scheme       of Scheme
327   * @param checkpointer of S3Checkpointer
328   * @param bucketName   of String
329   * @param key          of String
330   * @param delimiter    of String
331   */
332  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter )
333    {
334    this( scheme, null, checkpointer, bucketName, key, delimiter, SinkMode.KEEP );
335    }
336
337  /**
338   * Constructor S3Tap creates a new S3Tap instance.
339   *
340   * @param scheme       of Scheme
341   * @param checkpointer of S3Checkpointer
342   * @param bucketName   of String
343   * @param filter       of Predicate
344   */
345  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, Predicate<String> filter )
346    {
347    this( scheme, checkpointer, bucketName, null, filter, SinkMode.KEEP );
348    }
349
350  /**
351   * Constructor S3Tap creates a new S3Tap instance.
352   *
353   * @param scheme       of Scheme
354   * @param checkpointer of S3Checkpointer
355   * @param bucketName   of String
356   * @param key          of String
357   * @param filter       of Predicate
358   */
359  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, Predicate<String> filter )
360    {
361    this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, filter, SinkMode.KEEP );
362    }
363
364  /**
365   * Constructor S3Tap creates a new S3Tap instance.
366   *
367   * @param scheme       of Scheme
368   * @param checkpointer of S3Checkpointer
369   * @param bucketName   of String
370   * @param key          of String
371   * @param delimiter    of String
372   * @param filter       of Predicate
373   */
374  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter )
375    {
376    this( scheme, null, checkpointer, bucketName, key, delimiter, filter, SinkMode.KEEP );
377    }
378
379  /**
380   * Constructor S3Tap creates a new S3Tap instance.
381   *
382   * @param scheme       of Scheme
383   * @param s3Client     of AmazonS3
384   * @param checkpointer of S3Checkpointer
385   * @param bucketName   of String
386   */
387  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName )
388    {
389    this( scheme, s3Client, checkpointer, bucketName, null, SinkMode.KEEP );
390    }
391
392  /**
393   * Constructor S3Tap creates a new S3Tap instance.
394   *
395   * @param scheme       of Scheme
396   * @param s3Client     of AmazonS3
397   * @param checkpointer of S3Checkpointer
398   * @param bucketName   of String
399   * @param key          of String
400   */
401  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key )
402    {
403    this( scheme, s3Client, checkpointer, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP );
404    }
405
406  /**
407   * Constructor S3Tap creates a new S3Tap instance.
408   *
409   * @param scheme       of Scheme
410   * @param s3Client     of AmazonS3
411   * @param checkpointer of S3Checkpointer
412   * @param bucketName   of String
413   * @param key          of String
414   * @param delimiter    of String
415   */
416  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter )
417    {
418    this( scheme, s3Client, checkpointer, bucketName, key, delimiter, null, SinkMode.KEEP );
419    }
420
421  /**
422   * Constructor S3Tap creates a new S3Tap instance.
423   *
424   * @param scheme       of Scheme
425   * @param s3Client     of AmazonS3
426   * @param checkpointer of S3Checkpointer
427   * @param bucketName   of String
428   * @param key          of String
429   * @param delimiter    of String
430   * @param filter       of Predicate
431   */
432  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter )
433    {
434    this( scheme, s3Client, checkpointer, bucketName, key, delimiter, filter, SinkMode.KEEP );
435    }
436
437  /**
438   * Constructor S3Tap creates a new S3Tap instance.
439   *
440   * @param scheme     of Scheme
441   * @param bucketName of String
442   */
443  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, SinkMode sinkMode )
444    {
445    this( scheme, bucketName, null, null, null, sinkMode );
446    }
447
448  /**
449   * Constructor S3Tap creates a new S3Tap instance.
450   *
451   * @param scheme     of Scheme
452   * @param bucketName of String
453   * @param key        of String
454   */
455  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, SinkMode sinkMode )
456    {
457    this( scheme, bucketName, key, DEFAULT_DELIMITER, sinkMode );
458    }
459
460  /**
461   * Constructor S3Tap creates a new S3Tap instance.
462   *
463   * @param scheme     of Scheme
464   * @param bucketName of String
465   * @param key        of String
466   * @param delimiter  of String
467   */
468  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter, SinkMode sinkMode )
469    {
470    this( scheme, null, null, bucketName, key, delimiter, sinkMode );
471    }
472
473  /**
474   * Constructor S3Tap creates a new S3Tap instance.
475   *
476   * @param scheme     of Scheme
477   * @param bucketName of String
478   * @param filter     of Predicate
479   */
480  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, Predicate<String> filter, SinkMode sinkMode )
481    {
482    this( scheme, bucketName, null, filter, sinkMode );
483    }
484
485  /**
486   * Constructor S3Tap creates a new S3Tap instance.
487   *
488   * @param scheme     of Scheme
489   * @param bucketName of String
490   * @param key        of String
491   * @param filter     of Predicate
492   */
493  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, Predicate<String> filter, SinkMode sinkMode )
494    {
495    this( scheme, bucketName, key, DEFAULT_DELIMITER, filter, sinkMode );
496    }
497
498  /**
499   * Constructor S3Tap creates a new S3Tap instance.
500   *
501   * @param scheme     of Scheme
502   * @param bucketName of String
503   * @param key        of String
504   * @param delimiter  of String
505   * @param filter     of Predicate
506   * @param sinkMode   of SinkMode
507   */
508  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
509    {
510    this( scheme, null, null, bucketName, key, delimiter, filter, sinkMode );
511    }
512
513  /**
514   * Constructor S3Tap creates a new S3Tap instance.
515   *
516   * @param scheme     of Scheme
517   * @param s3Client   of AmazonS3
518   * @param bucketName of String
519   * @param sinkMode   of SinkMode
520   */
521  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, SinkMode sinkMode )
522    {
523    this( scheme, s3Client, bucketName, null, sinkMode );
524    }
525
526  /**
527   * Constructor S3Tap creates a new S3Tap instance.
528   *
529   * @param scheme     of Scheme
530   * @param s3Client   of AmazonS3
531   * @param bucketName of String
532   * @param key        of String
533   * @param sinkMode   of SinkMode
534   */
535  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, SinkMode sinkMode )
536    {
537    this( scheme, s3Client, bucketName, key, DEFAULT_DELIMITER, sinkMode );
538    }
539
540  /**
541   * Constructor S3Tap creates a new S3Tap instance.
542   *
543   * @param scheme     of Scheme
544   * @param s3Client   of AmazonS3
545   * @param bucketName of String
546   * @param key        of String
547   * @param delimiter  of String
548   * @param sinkMode   of SinkMode
549   */
550  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, SinkMode sinkMode )
551    {
552    this( scheme, s3Client, bucketName, key, delimiter, null, sinkMode );
553    }
554
555  /**
556   * Constructor S3Tap creates a new S3Tap instance.
557   *
558   * @param scheme     of Scheme
559   * @param s3Client   of AmazonS3
560   * @param bucketName of String
561   * @param key        of String
562   * @param delimiter  of String
563   * @param filter     of Predicate
564   * @param sinkMode   of SinkMode
565   */
566  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
567    {
568    this( scheme, s3Client, null, bucketName, key, delimiter, filter, sinkMode );
569    }
570
571  /**
572   * Constructor S3Tap creates a new S3Tap instance.
573   *
574   * @param scheme       of Scheme
575   * @param checkpointer of S3Checkpointer
576   * @param bucketName   of String
577   * @param sinkMode     of SinkMode
578   */
579  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, SinkMode sinkMode )
580    {
581    this( scheme, checkpointer, bucketName, null, null, null, sinkMode );
582    }
583
584  /**
585   * Constructor S3Tap creates a new S3Tap instance.
586   *
587   * @param scheme       of Scheme
588   * @param checkpointer of S3Checkpointer
589   * @param bucketName   of String
590   * @param key          of String
591   * @param sinkMode     of SinkMode
592   */
593  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, SinkMode sinkMode )
594    {
595    this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, sinkMode );
596    }
597
598  /**
599   * Constructor S3Tap creates a new S3Tap instance.
600   *
601   * @param scheme       of Scheme
602   * @param checkpointer of S3Checkpointer
603   * @param bucketName   of String
604   * @param key          of String
605   * @param delimiter    of String
606   * @param sinkMode     of SinkMode
607   */
608  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, SinkMode sinkMode )
609    {
610    this( scheme, null, checkpointer, bucketName, key, delimiter, sinkMode );
611    }
612
613  /**
614   * Constructor S3Tap creates a new S3Tap instance.
615   *
616   * @param scheme       of Scheme
617   * @param checkpointer of S3Checkpointer
618   * @param bucketName   of String
619   * @param filter       of Predicate
620   * @param sinkMode     of SinkMode
621   */
622  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, Predicate<String> filter, SinkMode sinkMode )
623    {
624    this( scheme, checkpointer, bucketName, null, filter, sinkMode );
625    }
626
627  /**
628   * Constructor S3Tap creates a new S3Tap instance.
629   *
630   * @param scheme       of Scheme
631   * @param checkpointer of S3Checkpointer
632   * @param bucketName   of String
633   * @param key          of String
634   * @param filter       of Predicate
635   * @param sinkMode     of SinkMode
636   */
637  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, Predicate<String> filter, SinkMode sinkMode )
638    {
639    this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, filter, sinkMode );
640    }
641
642  /**
643   * Constructor S3Tap creates a new S3Tap instance.
644   *
645   * @param scheme       of Scheme
646   * @param checkpointer of S3Checkpointer
647   * @param bucketName   of String
648   * @param key          of String
649   * @param delimiter    of String
650   * @param filter       of Predicate
651   * @param sinkMode     of SinkMode
652   */
653  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
654    {
655    this( scheme, null, checkpointer, bucketName, key, delimiter, filter, sinkMode );
656    }
657
658  /**
659   * Constructor S3Tap creates a new S3Tap instance.
660   *
661   * @param scheme       of Scheme
662   * @param s3Client     of AmazonS3
663   * @param checkpointer of S3Checkpointer
664   * @param bucketName   of String
665   * @param sinkMode     of SinkMode
666   */
667  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, SinkMode sinkMode )
668    {
669    this( scheme, s3Client, checkpointer, bucketName, null, sinkMode );
670    }
671
672  /**
673   * Constructor S3Tap creates a new S3Tap instance.
674   *
675   * @param scheme       of Scheme
676   * @param s3Client     of AmazonS3
677   * @param checkpointer of S3Checkpointer
678   * @param bucketName   of String
679   * @param key          of String
680   * @param sinkMode     of SinkMode
681   */
682  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, SinkMode sinkMode )
683    {
684    this( scheme, s3Client, checkpointer, bucketName, key, DEFAULT_DELIMITER, sinkMode );
685    }
686
687  /**
688   * Constructor S3Tap creates a new S3Tap instance.
689   *
690   * @param scheme       of Scheme
691   * @param s3Client     of AmazonS3
692   * @param checkpointer of S3Checkpointer
693   * @param bucketName   of String
694   * @param key          of String
695   * @param delimiter    of String
696   * @param sinkMode     of SinkMode
697   */
698  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, SinkMode sinkMode )
699    {
700    this( scheme, s3Client, checkpointer, bucketName, key, delimiter, null, sinkMode );
701    }
702
703  /**
704   * Constructor S3Tap creates a new S3Tap instance.
705   *
706   * @param scheme       of Scheme
707   * @param s3Client     of AmazonS3
708   * @param checkpointer of S3Checkpointer
709   * @param bucketName   of String
710   * @param key          of String
711   * @param delimiter    of String
712   * @param filter       of Predicate
713   * @param sinkMode     of SinkMode
714   */
715  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
716    {
717    super( scheme, sinkMode );
718    this.s3Client = s3Client;
719    this.checkpointer = checkpointer;
720    this.bucketName = bucketName;
721
722    if( isEmpty( this.bucketName ) )
723      throw new IllegalArgumentException( "bucket name may not be null or empty" );
724
725    this.key = key;
726    this.delimiter = delimiter;
727    this.filter = filter;
728    }
729
730  /**
731   * Constructor S3Tap creates a new S3Tap instance.
732   *
733   * @param scheme     of Scheme
734   * @param identifier of URI
735   */
736  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, URI identifier )
737    {
738    this( scheme, null, null, identifier, SinkMode.KEEP );
739    }
740
741  /**
742   * Constructor S3Tap creates a new S3Tap instance.
743   *
744   * @param scheme     of Scheme
745   * @param s3Client   of AmazonS3
746   * @param identifier of URI
747   */
748  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, URI identifier )
749    {
750    this( scheme, s3Client, null, identifier, SinkMode.KEEP );
751    }
752
753  /**
754   * Constructor S3Tap creates a new S3Tap instance.
755   *
756   * @param scheme       of Scheme
757   * @param checkpointer of S3Checkpointer
758   * @param identifier   of URI
759   */
760  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, URI identifier )
761    {
762    this( scheme, null, checkpointer, identifier, SinkMode.KEEP );
763    }
764
765  /**
766   * Constructor S3Tap creates a new S3Tap instance.
767   *
768   * @param scheme       of Scheme
769   * @param s3Client     of AmazonS3
770   * @param checkpointer of S3Checkpointer
771   * @param identifier   of URI
772   */
773  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, URI identifier )
774    {
775    this( scheme, s3Client, checkpointer, identifier, SinkMode.KEEP );
776    }
777
778  /**
779   * Constructor S3Tap creates a new S3Tap instance.
780   *
781   * @param scheme     of Scheme
782   * @param identifier of URI
783   * @param sinkMode   of SinkMode
784   */
785  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, URI identifier, SinkMode sinkMode )
786    {
787    this( scheme, null, null, identifier, sinkMode );
788    }
789
790  /**
791   * Constructor S3Tap creates a new S3Tap instance.
792   *
793   * @param scheme     of Scheme
794   * @param s3Client   of AmazonS3
795   * @param identifier of URI
796   * @param sinkMode   of SinkMode
797   */
798  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, URI identifier, SinkMode sinkMode )
799    {
800    this( scheme, s3Client, null, identifier, sinkMode );
801    }
802
803  /**
804   * Constructor S3Tap creates a new S3Tap instance.
805   *
806   * @param scheme       of Scheme
807   * @param checkpointer of S3Checkpointer
808   * @param identifier   of URI
809   * @param sinkMode     of SinkMode
810   */
811  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, URI identifier, SinkMode sinkMode )
812    {
813    this( scheme, null, checkpointer, identifier, sinkMode );
814    }
815
816  /**
817   * Constructor S3Tap creates a new S3Tap instance.
818   *
819   * @param scheme       of Scheme
820   * @param s3Client     of AmazonS3
821   * @param checkpointer of S3Checkpointer
822   * @param identifier   of URI
823   * @param sinkMode     of SinkMode
824   */
825  public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, URI identifier, SinkMode sinkMode )
826    {
827    super( scheme, sinkMode );
828    this.s3Client = s3Client;
829    this.checkpointer = checkpointer;
830
831    if( identifier == null )
832      throw new IllegalArgumentException( "identifier may not be null" );
833
834    if( !identifier.getScheme().equalsIgnoreCase( "s3" ) )
835      throw new IllegalArgumentException( "identifier does not have s3 scheme" );
836
837    this.bucketName = getBucketNameFor( identifier );
838
839    if( isEmpty( this.bucketName ) )
840      throw new IllegalArgumentException( "bucket name may not be null or empty" + identifier );
841
842    this.key = cleanKey( identifier );
843
844    if( identifier.getQuery() != null )
845      filter = globPredicate( identifier.getQuery() );
846    }
847
848  protected String getBucketNameFor( URI identifier )
849    {
850    String authority = identifier.getAuthority();
851
852    if( isEmpty( authority ) )
853      throw new IllegalArgumentException( "identifier must have an authority: " + identifier );
854
855    int pos = authority.indexOf( '@' );
856
857    if( pos != -1 )
858      return authority.substring( pos + 1 );
859
860    return authority;
861    }
862
863  private static Predicate<String> globPredicate( String glob )
864    {
865    String regex = getRegexForGlob( glob );
866    Pattern pattern = Pattern.compile( regex );
867
868    return string -> pattern.matcher( string ).matches();
869    }
870
871  private static String getRegexForGlob( String glob )
872    {
873    return (String) Util.invokeStaticMethod(
874      "sun.nio.fs.Globs",
875      "toUnixRegexPattern",
876      new Object[]{glob},
877      new Class[]{String.class}
878    );
879    }
880
881  @Override
882  public TapWith<Properties, InputStream, OutputStream> withScheme( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme )
883    {
884    // don't lazily create s3Client
885    return create( scheme, s3Client, getBucketName(), getKey(), getDelimiter(), getFilter(), getSinkMode() );
886    }
887
888  @Override
889  public TapWith<Properties, InputStream, OutputStream> withChildIdentifier( String identifier )
890    {
891    URI uri;
892
893    if( identifier.startsWith( "s3://" ) )
894      uri = URI.create( identifier );
895    else if( identifier.startsWith( getBucketName() ) )
896      uri = makeURI( identifier, null );
897    else
898      uri = makeURI( getBucketName(), getKey() + ( identifier.startsWith( delimiter ) ? identifier : delimiter + identifier ) );
899
900    // don't lazily create s3Client
901    return create( getScheme(), s3Client, uri, getSinkMode() );
902    }
903
904  protected TapWith<Properties, InputStream, OutputStream> create( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, URI identifier, SinkMode sinkMode )
905    {
906    try
907      {
908      return Util.newInstance( getClass(), scheme, s3Client, identifier, sinkMode );
909      }
910    catch( CascadingException exception )
911      {
912      throw new TapException( "unable to create a new instance of: " + getClass().getName(), exception );
913      }
914    }
915
916  @Override
917  public TapWith<Properties, InputStream, OutputStream> withSinkMode( SinkMode sinkMode )
918    {
919    // don't lazily create s3Client
920    return create( getScheme(), s3Client, getBucketName(), getKey(), getDelimiter(), getFilter(), sinkMode );
921    }
922
923  protected TapWith<Properties, InputStream, OutputStream> create( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode )
924    {
925    try
926      {
927      return Util.newInstance( getClass(), scheme, s3Client, bucketName, key, delimiter, filter, sinkMode );
928      }
929    catch( CascadingException exception )
930      {
931      throw new TapException( "unable to create a new instance of: " + getClass().getName(), exception );
932      }
933    }
934
935  protected String cleanKey( URI identifier )
936    {
937    String path = identifier.normalize().getPath();
938
939    if( path.startsWith( "/" ) )
940      path = path.substring( 1 );
941
942    return path;
943    }
944
945  protected AmazonS3 getS3Client( Properties properties )
946    {
947    // return provided client
948    if( s3Client != null )
949      return s3Client;
950
951    AmazonS3ClientBuilder standard = AmazonS3ClientBuilder.standard();
952
953    if( properties != null )
954      {
955      String endpoint = properties.getProperty( S3TapProps.S3_ENDPOINT );
956      String region = properties.getProperty( S3TapProps.S3_REGION, "us-east-1" );
957
958      if( properties.containsKey( S3TapProps.S3_PROXY_HOST ) )
959        {
960        ClientConfiguration config = new ClientConfiguration()
961          .withProxyHost( properties.getProperty( S3TapProps.S3_PROXY_HOST ) )
962          .withProxyPort( PropertyUtil.getIntProperty( properties, S3TapProps.S3_PROXY_PORT, -1 ) );
963
964        standard.withClientConfiguration( config );
965        }
966
967      if( endpoint != null )
968        standard.withEndpointConfiguration( new AwsClientBuilder.EndpointConfiguration( endpoint, region ) );
969      else
970        standard.setRegion( region );
971
972      if( Boolean.parseBoolean( properties.getProperty( S3TapProps.S3_PATH_STYLE_ACCESS, "false" ) ) )
973        standard.enablePathStyleAccess();
974      }
975
976    return standard.build();
977    }
978
979  /**
980   * Method getCheckpointer returns the checkpointer of this S3Tap object.
981   *
982   * @return the checkpointer (type S3Checkpointer) of this S3Tap object.
983   */
984  public S3Checkpointer getCheckpointer()
985    {
986    return checkpointer;
987    }
988
989  /**
990   * Method getBucketName returns the bucketName of this S3Tap object.
991   *
992   * @return the bucketName (type String) of this S3Tap object.
993   */
994  public String getBucketName()
995    {
996    return bucketName;
997    }
998
999  /**
1000   * Method getKey returns the key of this S3Tap object.
1001   *
1002   * @return the key (type String) of this S3Tap object.
1003   */
1004  public String getKey()
1005    {
1006    return key;
1007    }
1008
1009  protected String getMarker()
1010    {
1011    if( checkpointer != null )
1012      return checkpointer.getLastKey( getBucketName() );
1013
1014    return null;
1015    }
1016
1017  protected void setLastMarker( String marker )
1018    {
1019    if( checkpointer != null )
1020      checkpointer.setLastKey( getBucketName(), marker );
1021    }
1022
1023  protected void commitMarker()
1024    {
1025    if( checkpointer != null )
1026      checkpointer.commit();
1027    }
1028
1029  /**
1030   * Method getFilter returns the filter of this S3Tap object.
1031   *
1032   * @return the filter (type Predicate) of this S3Tap object.
1033   */
1034  public Predicate<String> getFilter()
1035    {
1036    return filter;
1037    }
1038
1039  /**
1040   * Method getDelimiter returns the delimiter of this S3Tap object.
1041   *
1042   * @return the delimiter (type String) of this S3Tap object.
1043   */
1044  public String getDelimiter()
1045    {
1046    return delimiter;
1047    }
1048
1049  @Override
1050  public String getIdentifier()
1051    {
1052    return makeStringIdentifier( getBucketName(), getKey() );
1053    }
1054
1055  @Override
1056  public String getFullIdentifier( Properties conf )
1057    {
1058    return getIdentifier();
1059    }
1060
1061  @Override
1062  public boolean deleteResource( Properties conf ) throws IOException
1063    {
1064    AmazonS3 s3Client = getS3Client( conf );
1065
1066    try
1067      {
1068      s3Client.deleteObject( getBucketName(), getKey() );
1069      }
1070    catch( AmazonS3Exception exception )
1071      {
1072      throw handleException( s3Client, exception );
1073      }
1074
1075    return true;
1076    }
1077
1078  @Override
1079  public boolean createResource( Properties conf ) throws IOException
1080    {
1081    AmazonS3 s3Client = getS3Client( conf );
1082
1083    try
1084      {
1085      s3Client.putObject( getBucketName(), getKey(), "" );
1086      }
1087    catch( AmazonS3Exception exception )
1088      {
1089      throw handleException( s3Client, exception );
1090      }
1091
1092    return true;
1093    }
1094
1095  protected ObjectMetadata getObjectMetadata( Properties conf )
1096    {
1097    try
1098      {
1099      if( objectMetadata == null )
1100        objectMetadata = getS3Client( conf ).getObjectMetadata( getBucketName(), getKey() );
1101
1102      return objectMetadata;
1103      }
1104    catch( AmazonS3Exception exception )
1105      {
1106      throw handleException( getS3Client( conf ), exception );
1107      }
1108    }
1109
1110  private class CheckedFilterInputStream extends FilterInputStream
1111    {
1112    public CheckedFilterInputStream( InputStream inputStream )
1113      {
1114      super( inputStream );
1115      }
1116    }
1117
1118  @Override
1119  public TupleEntryIterator openForRead( FlowProcess<? extends Properties> flowProcess, InputStream input ) throws IOException
1120    {
1121    AmazonS3 s3Client = getS3Client( flowProcess.getConfig() );
1122
1123    final String[] identifier = new String[ 1 ];
1124
1125    CloseableIterator<InputStream> iterator = new CloseableIterator<InputStream>()
1126      {
1127      S3Iterable iterable = S3Iterable.iterable( s3Client, getBucketName(), getKey() )
1128        .withFilter( getFilter() )
1129        .withMarker( getMarker() );
1130
1131      Iterator<S3ObjectSummary> iterator = iterable.iterator();
1132      InputStream lastInputStream;
1133
1134      @Override
1135      public boolean hasNext()
1136        {
1137        return iterator.hasNext();
1138        }
1139
1140      @Override
1141      public InputStream next()
1142        {
1143        safeClose();
1144
1145        S3ObjectSummary objectSummary = iterator.next();
1146
1147        identifier[ 0 ] = makeStringIdentifier( objectSummary.getBucketName(), objectSummary.getKey() );
1148
1149        flowProcess.getFlowProcessContext().setSourcePath( identifier[ 0 ] );
1150
1151        if( LOG.isDebugEnabled() )
1152          LOG.debug( "s3 retrieving: {}/{}, with size: {}", objectSummary.getBucketName(), objectSummary.getKey(), objectSummary.getSize() );
1153
1154        // getObject does not seem to fill the InputStream, nor does the InputStream support marking
1155        // may make sense to wrap this iterator in a iterate ahead iterator that attempts to pre-fetch objects in a different thread
1156        lastInputStream = new CheckedFilterInputStream( s3Client.getObject( objectSummary.getBucketName(), objectSummary.getKey() ).getObjectContent() )
1157          {
1158          @Override
1159          public void close() throws IOException
1160            {
1161            setLastMarker( objectSummary.getKey() );
1162            super.close();
1163            }
1164          };
1165
1166        return lastInputStream;
1167        }
1168
1169      private void safeClose()
1170        {
1171        try
1172          {
1173          if( lastInputStream != null )
1174            lastInputStream.close();
1175
1176          lastInputStream = null;
1177          }
1178        catch( IOException exception )
1179          {
1180          // do nothing
1181          }
1182        }
1183
1184      @Override
1185      public void close()
1186        {
1187        safeClose();
1188        commitMarker();
1189        }
1190      };
1191
1192    return new TupleEntrySchemeIterator<Properties, InputStream>( flowProcess, this, getScheme(), iterator, () -> identifier[ 0 ] );
1193    }
1194
1195  @Override
1196  public TupleEntryCollector openForWrite( FlowProcess<? extends Properties> flowProcess, OutputStream outputStream ) throws IOException
1197    {
1198    AmazonS3 s3Client = getS3Client( flowProcess.getConfig() );
1199
1200    if( !s3Client.doesBucketExistV2( getBucketName() ) )
1201      s3Client.createBucket( getBucketName() );
1202
1203    PipedInputStream pipedInputStream = new PipedInputStream();
1204    PipedOutputStream pipedOutputStream = new PipedOutputStream( pipedInputStream );
1205
1206    TransferManager transferManager = getTransferManager( s3Client );
1207
1208    ObjectMetadata metadata = new ObjectMetadata();
1209
1210    final String key = resolveKey( flowProcess, getKey() );
1211
1212    LOG.info( "starting async upload: {}", makeStringIdentifier( getBucketName(), key ) );
1213
1214    Upload upload = transferManager.upload( getBucketName(), key, pipedInputStream, metadata );
1215
1216    return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, this, getScheme(), pipedOutputStream, makeStringIdentifier( getBucketName(), key ) )
1217      {
1218      @Override
1219      public void close()
1220        {
1221        super.close(); // flushes and closes output
1222
1223        try
1224          {
1225          UploadResult uploadResult = upload.waitForUploadResult();
1226
1227          if( uploadResult != null )
1228            {
1229            if( LOG.isDebugEnabled() )
1230              LOG.debug( "completed upload: {}, with key: {}", getIdentifier(), uploadResult.getKey() );
1231            }
1232          }
1233        catch( InterruptedException exception )
1234          {
1235          // ignore
1236          }
1237        finally
1238          {
1239          transferManager.shutdownNow( false );
1240          }
1241        }
1242      };
1243    }
1244
1245  protected TransferManager getTransferManager( AmazonS3 s3Client )
1246    {
1247    if( transferManager == null )
1248      transferManager = TransferManagerBuilder.standard().withS3Client( s3Client ).build();
1249
1250    return transferManager;
1251    }
1252
1253  protected String resolveKey( FlowProcess<? extends Properties> flowProcess, String key )
1254    {
1255    int partNum = flowProcess.getIntegerProperty( PartitionTap.PART_NUM_PROPERTY, 0 );
1256
1257    key = key.replace( SEQUENCE_TOKEN, String.format( "%05d", partNum ) );
1258
1259    if( getScheme() instanceof FileFormat )
1260      return key + "." + ( (FileFormat) getScheme() ).getExtension();
1261
1262    return key;
1263    }
1264
1265  @Override
1266  public boolean resourceExists( Properties conf ) throws IOException
1267    {
1268    AmazonS3 s3Client = getS3Client( conf );
1269
1270    try
1271      {
1272      if( getKey() == null )
1273        return s3Client.doesBucketExistV2( getBucketName() );
1274
1275      return s3Client.doesObjectExist( getBucketName(), getKey() );
1276      }
1277    catch( AmazonS3Exception exception )
1278      {
1279      throw handleException( s3Client, exception );
1280      }
1281    }
1282
1283  protected AmazonS3Exception handleException( AmazonS3 s3Client, AmazonS3Exception exception )
1284    {
1285    if( exception.getStatusCode() == 400 )
1286      {
1287      LOG.error( "s3 request failed, try changing the AWS Region from: {}, using property: {}", s3Client.getRegionName(), S3TapProps.S3_REGION, exception );
1288      }
1289
1290    return exception;
1291    }
1292
1293  @Override
1294  public long getModifiedTime( Properties conf ) throws IOException
1295    {
1296    return getObjectMetadata( conf ).getLastModified().getTime();
1297    }
1298
1299  @Override
1300  public boolean isDirectory( FlowProcess<? extends Properties> flowProcess ) throws IOException
1301    {
1302    return MIME_DIRECTORY.equalsIgnoreCase( getObjectMetadata( flowProcess.getConfig() ).getContentType() );
1303    }
1304
1305  @Override
1306  public boolean isDirectory( Properties conf ) throws IOException
1307    {
1308    return isDirectory( FlowProcess.nullFlowProcess() );
1309    }
1310
1311  @Override
1312  public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess ) throws IOException
1313    {
1314    return getChildIdentifiers( flowProcess.getConfig() );
1315    }
1316
1317  @Override
1318  public String[] getChildIdentifiers( Properties conf ) throws IOException
1319    {
1320    return getChildIdentifiers( conf, 1, false );
1321    }
1322
1323  @Override
1324  public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess, int depth, boolean fullyQualified ) throws IOException
1325    {
1326    return getChildIdentifiers( flowProcess.getConfig(), depth, fullyQualified );
1327    }
1328
1329  @Override
1330  public String[] getChildIdentifiers( Properties conf, int depth, boolean fullyQualified ) throws IOException
1331    {
1332    if( !resourceExists( conf ) )
1333      return new String[ 0 ];
1334
1335    S3Iterable objects = S3Iterable.iterable( getS3Client( conf ), getBucketName(), getKey() )
1336      .withDelimiter( getDelimiter() )
1337      .withMaxDepth( depth )
1338      .withFilter( getFilter() )
1339      .withMarker( getMarker() );
1340
1341    Iterator<S3ObjectSummary> iterator = objects.iterator();
1342
1343    List<String> results = new ArrayList<>();
1344
1345    while( iterator.hasNext() )
1346      results.add( makePath( iterator, fullyQualified ) );
1347
1348    return results.toArray( new String[ results.size() ] );
1349    }
1350
1351  protected String makePath( Iterator<S3ObjectSummary> iterator, boolean fullyQualified )
1352    {
1353    String key = iterator.next().getKey();
1354
1355    if( fullyQualified )
1356      return makeStringIdentifier( getBucketName(), key );
1357
1358    return key.substring( getKey().length() );
1359    }
1360
1361  @Override
1362  public long getSize( FlowProcess<? extends Properties> flowProcess ) throws IOException
1363    {
1364    return getSize( flowProcess.getConfig() );
1365    }
1366
1367  @Override
1368  public long getSize( Properties conf ) throws IOException
1369    {
1370    if( isDirectory( conf ) )
1371      return 0;
1372
1373    return getObjectMetadata( conf ).getInstanceLength();
1374    }
1375
1376  protected static String makeStringIdentifier( String bucketName, String keyPrefix )
1377    {
1378    if( isEmpty( keyPrefix ) )
1379      return String.format( "s3://%s/", bucketName );
1380
1381    return String.format( "s3://%s/%s", bucketName, keyPrefix );
1382    }
1383  }