001/* 002 * Copyright (c) 2017-2018 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.local.tap.aws.s3; 022 023import java.io.FilterInputStream; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.OutputStream; 027import java.io.PipedInputStream; 028import java.io.PipedOutputStream; 029import java.net.URI; 030import java.net.URISyntaxException; 031import java.util.ArrayList; 032import java.util.Iterator; 033import java.util.List; 034import java.util.Properties; 035import java.util.function.Predicate; 036import java.util.regex.Pattern; 037 038import cascading.CascadingException; 039import cascading.flow.FlowProcess; 040import cascading.property.PropertyUtil; 041import cascading.scheme.FileFormat; 042import cascading.scheme.Scheme; 043import cascading.tap.SinkMode; 044import cascading.tap.Tap; 045import cascading.tap.TapException; 046import cascading.tap.local.PartitionTap; 047import cascading.tap.type.FileType; 048import cascading.tap.type.TapWith; 049import cascading.tuple.TupleEntryCollector; 050import cascading.tuple.TupleEntryIterator; 051import cascading.tuple.TupleEntrySchemeCollector; 052import cascading.tuple.TupleEntrySchemeIterator; 053import cascading.util.CloseableIterator; 054import cascading.util.Util; 055import com.amazonaws.ClientConfiguration; 056import com.amazonaws.client.builder.AwsClientBuilder; 057import com.amazonaws.services.s3.AmazonS3; 058import com.amazonaws.services.s3.AmazonS3ClientBuilder; 059import com.amazonaws.services.s3.model.AmazonS3Exception; 060import com.amazonaws.services.s3.model.ObjectMetadata; 061import com.amazonaws.services.s3.model.S3ObjectSummary; 062import com.amazonaws.services.s3.transfer.TransferManager; 063import com.amazonaws.services.s3.transfer.TransferManagerBuilder; 064import com.amazonaws.services.s3.transfer.Upload; 065import com.amazonaws.services.s3.transfer.model.UploadResult; 066import org.slf4j.Logger; 067import org.slf4j.LoggerFactory; 068 069import static cascading.util.Util.isEmpty; 070 071/** 072 * Class S3Tap is a Cascading local-mode {@link Tap} providing read and write access to data stored in Amazon S3 buckets. 073 * <p> 074 * This Tap is not intended to be used with any of the other Cascading planners unless they specify they are local-mode 075 * compatible. 076 * <p> 077 * S3Tap can read a single key, all objects underneath a key-prefix, or all objects under a key-prefix that match 078 * a given globbing pattern. 079 * <p> 080 * See the various constructors for the available access parametrizations. Of note are the constructors that take 081 * a {@link URI} instance. The URI should be in the following format: 082 * {@code s3://[bucket]/<key|key-prefix><?glob>} 083 * <p> 084 * Where bucket is the only required value. The key references a single object, the key-prefix is used to access 085 * a set of objects with a common prefix value. The glob value is use to further narrow the resulting object set. 086 * <p> 087 * The globbing pattern is specified by the {@link java.nio.file.FileSystem#getPathMatcher} method. 088 * <p> 089 * This Tap was designed to allow applications to effectively poll an S3 bucket for new keys to be processed. 090 * <p> 091 * When used with the {@link S3FileCheckpointer} class, a map of keys last consumed by each bucket will be tracked 092 * on disk, with the map surviving JVM restarts allowing for applications to exit and restart safely without 093 * retrieving duplicate data. 094 * <p> 095 * The {@link S3Checkpointer#commit()} method is only called during a graceful shutdown of the Flow or JVM, but every 096 * consumed key is passed to the S3Checkpointer, so custom implementations can choose to persist the key more 097 * frequently. 098 * <p> 099 * AWS Credentials are handled by {@link com.amazonaws.auth.DefaultAWSCredentialsProviderChain}. 100 */ 101public class S3Tap extends Tap<Properties, InputStream, OutputStream> implements FileType<Properties>, TapWith<Properties, InputStream, OutputStream> 102 { 103 /** Field LOG */ 104 private static final Logger LOG = LoggerFactory.getLogger( S3Tap.class ); 105 106 /** Field SEQUENCE_TOKEN */ 107 public static final String SEQUENCE_TOKEN = "{sequence}"; 108 /** Field MIME_DIRECTORY */ 109 public static final String MIME_DIRECTORY = "application/x-directory"; 110 /** Field DEFAULT_DELIMITER */ 111 public static final String DEFAULT_DELIMITER = "/"; 112 113 /** Field s3Client */ 114 AmazonS3 s3Client; 115 /** Field bucketName */ 116 String bucketName; 117 /** Field key */ 118 String key; 119 /** Field filter */ 120 Predicate<String> filter; 121 /** Field delimiter */ 122 String delimiter = DEFAULT_DELIMITER; 123 /** Field checkpointer */ 124 S3Checkpointer checkpointer; 125 126 private transient ObjectMetadata objectMetadata; 127 private transient TransferManager transferManager; 128 129 /** 130 * Method makeURI creates a new S3 URI from the given parameters. 131 * 132 * @param bucketName the S3 bucket name 133 * @param keyPrefix the S3 object key or key-prefix 134 * @return an URI instance 135 */ 136 public static URI makeURI( String bucketName, String keyPrefix ) 137 { 138 return makeURI( bucketName, keyPrefix, null ); 139 } 140 141 /** 142 * Method makeURI creates a new S3 URI from the given parameters. 143 * 144 * @param bucketName the S3 bucket name 145 * @param keyPrefix the S3 object key or key-prefix 146 * @param glob the globbing pattern to apply to the keys 147 * @return an URI instance 148 */ 149 public static URI makeURI( String bucketName, String keyPrefix, String glob ) 150 { 151 if( bucketName == null ) 152 throw new IllegalArgumentException( "bucketName may not be null" ); 153 154 try 155 { 156 if( keyPrefix == null ) 157 keyPrefix = "/"; 158 else if( !keyPrefix.startsWith( "/" ) ) 159 keyPrefix = "/" + keyPrefix; 160 161 return new URI( "s3", bucketName, keyPrefix, glob, null ); 162 } 163 catch( URISyntaxException exception ) 164 { 165 throw new IllegalArgumentException( exception.getMessage(), exception ); 166 } 167 } 168 169 /** 170 * Constructor S3Tap creates a new S3Tap instance. 171 * 172 * @param scheme of Scheme 173 * @param bucketName of String 174 */ 175 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName ) 176 { 177 this( scheme, bucketName, null, null, null, SinkMode.KEEP ); 178 } 179 180 /** 181 * Constructor S3Tap creates a new S3Tap instance. 182 * 183 * @param scheme of Scheme 184 * @param bucketName of String 185 * @param key of String 186 */ 187 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key ) 188 { 189 this( scheme, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP ); 190 } 191 192 /** 193 * Constructor S3Tap creates a new S3Tap instance. 194 * 195 * @param scheme of Scheme 196 * @param bucketName of String 197 * @param key of String 198 * @param delimiter of String 199 */ 200 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter ) 201 { 202 this( scheme, null, null, bucketName, key, delimiter, SinkMode.KEEP ); 203 } 204 205 /** 206 * Constructor S3Tap creates a new S3Tap instance. 207 * 208 * @param scheme of Scheme 209 * @param bucketName of String 210 * @param filter of Predicate 211 */ 212 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, Predicate<String> filter ) 213 { 214 this( scheme, bucketName, null, filter, SinkMode.KEEP ); 215 } 216 217 /** 218 * Constructor S3Tap creates a new S3Tap instance. 219 * 220 * @param scheme of Scheme 221 * @param bucketName of String 222 * @param key of String 223 * @param filter of Predicate 224 */ 225 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, Predicate<String> filter ) 226 { 227 this( scheme, bucketName, key, DEFAULT_DELIMITER, filter, SinkMode.KEEP ); 228 } 229 230 /** 231 * Constructor S3Tap creates a new S3Tap instance. 232 * 233 * @param scheme of Scheme 234 * @param bucketName of String 235 * @param key of String 236 * @param delimiter of String 237 * @param filter of Predicate 238 */ 239 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter, Predicate<String> filter ) 240 { 241 this( scheme, null, null, bucketName, key, delimiter, filter, SinkMode.KEEP ); 242 } 243 244 /** 245 * Constructor S3Tap creates a new S3Tap instance. 246 * 247 * @param scheme of Scheme 248 * @param s3Client of AmazonS3 249 * @param bucketName of String 250 */ 251 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName ) 252 { 253 this( scheme, s3Client, bucketName, null, SinkMode.KEEP ); 254 } 255 256 /** 257 * Constructor S3Tap creates a new S3Tap instance. 258 * 259 * @param scheme of Scheme 260 * @param s3Client of AmazonS3 261 * @param bucketName of String 262 * @param key of String 263 */ 264 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key ) 265 { 266 this( scheme, s3Client, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP ); 267 } 268 269 /** 270 * Constructor S3Tap creates a new S3Tap instance. 271 * 272 * @param scheme of Scheme 273 * @param s3Client of AmazonS3 274 * @param bucketName of String 275 * @param key of String 276 * @param delimiter of String 277 */ 278 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter ) 279 { 280 this( scheme, s3Client, bucketName, key, delimiter, null, SinkMode.KEEP ); 281 } 282 283 /** 284 * Constructor S3Tap creates a new S3Tap instance. 285 * 286 * @param scheme of Scheme 287 * @param s3Client of AmazonS3 288 * @param bucketName of String 289 * @param key of String 290 * @param delimiter of String 291 * @param filter of Predicate 292 */ 293 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, Predicate<String> filter ) 294 { 295 this( scheme, s3Client, null, bucketName, key, delimiter, filter, SinkMode.KEEP ); 296 } 297 298 /** 299 * Constructor S3Tap creates a new S3Tap instance. 300 * 301 * @param scheme of Scheme 302 * @param checkpointer of S3Checkpointer 303 * @param bucketName of String 304 */ 305 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName ) 306 { 307 this( scheme, checkpointer, bucketName, null, null, null, SinkMode.KEEP ); 308 } 309 310 /** 311 * Constructor S3Tap creates a new S3Tap instance. 312 * 313 * @param scheme of Scheme 314 * @param checkpointer of S3Checkpointer 315 * @param bucketName of String 316 * @param key of String 317 */ 318 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key ) 319 { 320 this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP ); 321 } 322 323 /** 324 * Constructor S3Tap creates a new S3Tap instance. 325 * 326 * @param scheme of Scheme 327 * @param checkpointer of S3Checkpointer 328 * @param bucketName of String 329 * @param key of String 330 * @param delimiter of String 331 */ 332 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter ) 333 { 334 this( scheme, null, checkpointer, bucketName, key, delimiter, SinkMode.KEEP ); 335 } 336 337 /** 338 * Constructor S3Tap creates a new S3Tap instance. 339 * 340 * @param scheme of Scheme 341 * @param checkpointer of S3Checkpointer 342 * @param bucketName of String 343 * @param filter of Predicate 344 */ 345 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, Predicate<String> filter ) 346 { 347 this( scheme, checkpointer, bucketName, null, filter, SinkMode.KEEP ); 348 } 349 350 /** 351 * Constructor S3Tap creates a new S3Tap instance. 352 * 353 * @param scheme of Scheme 354 * @param checkpointer of S3Checkpointer 355 * @param bucketName of String 356 * @param key of String 357 * @param filter of Predicate 358 */ 359 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, Predicate<String> filter ) 360 { 361 this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, filter, SinkMode.KEEP ); 362 } 363 364 /** 365 * Constructor S3Tap creates a new S3Tap instance. 366 * 367 * @param scheme of Scheme 368 * @param checkpointer of S3Checkpointer 369 * @param bucketName of String 370 * @param key of String 371 * @param delimiter of String 372 * @param filter of Predicate 373 */ 374 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter ) 375 { 376 this( scheme, null, checkpointer, bucketName, key, delimiter, filter, SinkMode.KEEP ); 377 } 378 379 /** 380 * Constructor S3Tap creates a new S3Tap instance. 381 * 382 * @param scheme of Scheme 383 * @param s3Client of AmazonS3 384 * @param checkpointer of S3Checkpointer 385 * @param bucketName of String 386 */ 387 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName ) 388 { 389 this( scheme, s3Client, checkpointer, bucketName, null, SinkMode.KEEP ); 390 } 391 392 /** 393 * Constructor S3Tap creates a new S3Tap instance. 394 * 395 * @param scheme of Scheme 396 * @param s3Client of AmazonS3 397 * @param checkpointer of S3Checkpointer 398 * @param bucketName of String 399 * @param key of String 400 */ 401 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key ) 402 { 403 this( scheme, s3Client, checkpointer, bucketName, key, DEFAULT_DELIMITER, SinkMode.KEEP ); 404 } 405 406 /** 407 * Constructor S3Tap creates a new S3Tap instance. 408 * 409 * @param scheme of Scheme 410 * @param s3Client of AmazonS3 411 * @param checkpointer of S3Checkpointer 412 * @param bucketName of String 413 * @param key of String 414 * @param delimiter of String 415 */ 416 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter ) 417 { 418 this( scheme, s3Client, checkpointer, bucketName, key, delimiter, null, SinkMode.KEEP ); 419 } 420 421 /** 422 * Constructor S3Tap creates a new S3Tap instance. 423 * 424 * @param scheme of Scheme 425 * @param s3Client of AmazonS3 426 * @param checkpointer of S3Checkpointer 427 * @param bucketName of String 428 * @param key of String 429 * @param delimiter of String 430 * @param filter of Predicate 431 */ 432 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter ) 433 { 434 this( scheme, s3Client, checkpointer, bucketName, key, delimiter, filter, SinkMode.KEEP ); 435 } 436 437 /** 438 * Constructor S3Tap creates a new S3Tap instance. 439 * 440 * @param scheme of Scheme 441 * @param bucketName of String 442 */ 443 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, SinkMode sinkMode ) 444 { 445 this( scheme, bucketName, null, null, null, sinkMode ); 446 } 447 448 /** 449 * Constructor S3Tap creates a new S3Tap instance. 450 * 451 * @param scheme of Scheme 452 * @param bucketName of String 453 * @param key of String 454 */ 455 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, SinkMode sinkMode ) 456 { 457 this( scheme, bucketName, key, DEFAULT_DELIMITER, sinkMode ); 458 } 459 460 /** 461 * Constructor S3Tap creates a new S3Tap instance. 462 * 463 * @param scheme of Scheme 464 * @param bucketName of String 465 * @param key of String 466 * @param delimiter of String 467 */ 468 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter, SinkMode sinkMode ) 469 { 470 this( scheme, null, null, bucketName, key, delimiter, sinkMode ); 471 } 472 473 /** 474 * Constructor S3Tap creates a new S3Tap instance. 475 * 476 * @param scheme of Scheme 477 * @param bucketName of String 478 * @param filter of Predicate 479 */ 480 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, Predicate<String> filter, SinkMode sinkMode ) 481 { 482 this( scheme, bucketName, null, filter, sinkMode ); 483 } 484 485 /** 486 * Constructor S3Tap creates a new S3Tap instance. 487 * 488 * @param scheme of Scheme 489 * @param bucketName of String 490 * @param key of String 491 * @param filter of Predicate 492 */ 493 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, Predicate<String> filter, SinkMode sinkMode ) 494 { 495 this( scheme, bucketName, key, DEFAULT_DELIMITER, filter, sinkMode ); 496 } 497 498 /** 499 * Constructor S3Tap creates a new S3Tap instance. 500 * 501 * @param scheme of Scheme 502 * @param bucketName of String 503 * @param key of String 504 * @param delimiter of String 505 * @param filter of Predicate 506 * @param sinkMode of SinkMode 507 */ 508 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode ) 509 { 510 this( scheme, null, null, bucketName, key, delimiter, filter, sinkMode ); 511 } 512 513 /** 514 * Constructor S3Tap creates a new S3Tap instance. 515 * 516 * @param scheme of Scheme 517 * @param s3Client of AmazonS3 518 * @param bucketName of String 519 * @param sinkMode of SinkMode 520 */ 521 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, SinkMode sinkMode ) 522 { 523 this( scheme, s3Client, bucketName, null, sinkMode ); 524 } 525 526 /** 527 * Constructor S3Tap creates a new S3Tap instance. 528 * 529 * @param scheme of Scheme 530 * @param s3Client of AmazonS3 531 * @param bucketName of String 532 * @param key of String 533 * @param sinkMode of SinkMode 534 */ 535 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, SinkMode sinkMode ) 536 { 537 this( scheme, s3Client, bucketName, key, DEFAULT_DELIMITER, sinkMode ); 538 } 539 540 /** 541 * Constructor S3Tap creates a new S3Tap instance. 542 * 543 * @param scheme of Scheme 544 * @param s3Client of AmazonS3 545 * @param bucketName of String 546 * @param key of String 547 * @param delimiter of String 548 * @param sinkMode of SinkMode 549 */ 550 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, SinkMode sinkMode ) 551 { 552 this( scheme, s3Client, bucketName, key, delimiter, null, sinkMode ); 553 } 554 555 /** 556 * Constructor S3Tap creates a new S3Tap instance. 557 * 558 * @param scheme of Scheme 559 * @param s3Client of AmazonS3 560 * @param bucketName of String 561 * @param key of String 562 * @param delimiter of String 563 * @param filter of Predicate 564 * @param sinkMode of SinkMode 565 */ 566 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode ) 567 { 568 this( scheme, s3Client, null, bucketName, key, delimiter, filter, sinkMode ); 569 } 570 571 /** 572 * Constructor S3Tap creates a new S3Tap instance. 573 * 574 * @param scheme of Scheme 575 * @param checkpointer of S3Checkpointer 576 * @param bucketName of String 577 * @param sinkMode of SinkMode 578 */ 579 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, SinkMode sinkMode ) 580 { 581 this( scheme, checkpointer, bucketName, null, null, null, sinkMode ); 582 } 583 584 /** 585 * Constructor S3Tap creates a new S3Tap instance. 586 * 587 * @param scheme of Scheme 588 * @param checkpointer of S3Checkpointer 589 * @param bucketName of String 590 * @param key of String 591 * @param sinkMode of SinkMode 592 */ 593 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, SinkMode sinkMode ) 594 { 595 this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, sinkMode ); 596 } 597 598 /** 599 * Constructor S3Tap creates a new S3Tap instance. 600 * 601 * @param scheme of Scheme 602 * @param checkpointer of S3Checkpointer 603 * @param bucketName of String 604 * @param key of String 605 * @param delimiter of String 606 * @param sinkMode of SinkMode 607 */ 608 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, SinkMode sinkMode ) 609 { 610 this( scheme, null, checkpointer, bucketName, key, delimiter, sinkMode ); 611 } 612 613 /** 614 * Constructor S3Tap creates a new S3Tap instance. 615 * 616 * @param scheme of Scheme 617 * @param checkpointer of S3Checkpointer 618 * @param bucketName of String 619 * @param filter of Predicate 620 * @param sinkMode of SinkMode 621 */ 622 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, Predicate<String> filter, SinkMode sinkMode ) 623 { 624 this( scheme, checkpointer, bucketName, null, filter, sinkMode ); 625 } 626 627 /** 628 * Constructor S3Tap creates a new S3Tap instance. 629 * 630 * @param scheme of Scheme 631 * @param checkpointer of S3Checkpointer 632 * @param bucketName of String 633 * @param key of String 634 * @param filter of Predicate 635 * @param sinkMode of SinkMode 636 */ 637 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, Predicate<String> filter, SinkMode sinkMode ) 638 { 639 this( scheme, checkpointer, bucketName, key, DEFAULT_DELIMITER, filter, sinkMode ); 640 } 641 642 /** 643 * Constructor S3Tap creates a new S3Tap instance. 644 * 645 * @param scheme of Scheme 646 * @param checkpointer of S3Checkpointer 647 * @param bucketName of String 648 * @param key of String 649 * @param delimiter of String 650 * @param filter of Predicate 651 * @param sinkMode of SinkMode 652 */ 653 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode ) 654 { 655 this( scheme, null, checkpointer, bucketName, key, delimiter, filter, sinkMode ); 656 } 657 658 /** 659 * Constructor S3Tap creates a new S3Tap instance. 660 * 661 * @param scheme of Scheme 662 * @param s3Client of AmazonS3 663 * @param checkpointer of S3Checkpointer 664 * @param bucketName of String 665 * @param sinkMode of SinkMode 666 */ 667 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, SinkMode sinkMode ) 668 { 669 this( scheme, s3Client, checkpointer, bucketName, null, sinkMode ); 670 } 671 672 /** 673 * Constructor S3Tap creates a new S3Tap instance. 674 * 675 * @param scheme of Scheme 676 * @param s3Client of AmazonS3 677 * @param checkpointer of S3Checkpointer 678 * @param bucketName of String 679 * @param key of String 680 * @param sinkMode of SinkMode 681 */ 682 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, SinkMode sinkMode ) 683 { 684 this( scheme, s3Client, checkpointer, bucketName, key, DEFAULT_DELIMITER, sinkMode ); 685 } 686 687 /** 688 * Constructor S3Tap creates a new S3Tap instance. 689 * 690 * @param scheme of Scheme 691 * @param s3Client of AmazonS3 692 * @param checkpointer of S3Checkpointer 693 * @param bucketName of String 694 * @param key of String 695 * @param delimiter of String 696 * @param sinkMode of SinkMode 697 */ 698 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, SinkMode sinkMode ) 699 { 700 this( scheme, s3Client, checkpointer, bucketName, key, delimiter, null, sinkMode ); 701 } 702 703 /** 704 * Constructor S3Tap creates a new S3Tap instance. 705 * 706 * @param scheme of Scheme 707 * @param s3Client of AmazonS3 708 * @param checkpointer of S3Checkpointer 709 * @param bucketName of String 710 * @param key of String 711 * @param delimiter of String 712 * @param filter of Predicate 713 * @param sinkMode of SinkMode 714 */ 715 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode ) 716 { 717 super( scheme, sinkMode ); 718 this.s3Client = s3Client; 719 this.checkpointer = checkpointer; 720 this.bucketName = bucketName; 721 722 if( isEmpty( this.bucketName ) ) 723 throw new IllegalArgumentException( "bucket name may not be null or empty" ); 724 725 this.key = key; 726 this.delimiter = delimiter; 727 this.filter = filter; 728 } 729 730 /** 731 * Constructor S3Tap creates a new S3Tap instance. 732 * 733 * @param scheme of Scheme 734 * @param identifier of URI 735 */ 736 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, URI identifier ) 737 { 738 this( scheme, null, null, identifier, SinkMode.KEEP ); 739 } 740 741 /** 742 * Constructor S3Tap creates a new S3Tap instance. 743 * 744 * @param scheme of Scheme 745 * @param s3Client of AmazonS3 746 * @param identifier of URI 747 */ 748 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, URI identifier ) 749 { 750 this( scheme, s3Client, null, identifier, SinkMode.KEEP ); 751 } 752 753 /** 754 * Constructor S3Tap creates a new S3Tap instance. 755 * 756 * @param scheme of Scheme 757 * @param checkpointer of S3Checkpointer 758 * @param identifier of URI 759 */ 760 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, URI identifier ) 761 { 762 this( scheme, null, checkpointer, identifier, SinkMode.KEEP ); 763 } 764 765 /** 766 * Constructor S3Tap creates a new S3Tap instance. 767 * 768 * @param scheme of Scheme 769 * @param s3Client of AmazonS3 770 * @param checkpointer of S3Checkpointer 771 * @param identifier of URI 772 */ 773 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, URI identifier ) 774 { 775 this( scheme, s3Client, checkpointer, identifier, SinkMode.KEEP ); 776 } 777 778 /** 779 * Constructor S3Tap creates a new S3Tap instance. 780 * 781 * @param scheme of Scheme 782 * @param identifier of URI 783 * @param sinkMode of SinkMode 784 */ 785 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, URI identifier, SinkMode sinkMode ) 786 { 787 this( scheme, null, null, identifier, sinkMode ); 788 } 789 790 /** 791 * Constructor S3Tap creates a new S3Tap instance. 792 * 793 * @param scheme of Scheme 794 * @param s3Client of AmazonS3 795 * @param identifier of URI 796 * @param sinkMode of SinkMode 797 */ 798 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, URI identifier, SinkMode sinkMode ) 799 { 800 this( scheme, s3Client, null, identifier, sinkMode ); 801 } 802 803 /** 804 * Constructor S3Tap creates a new S3Tap instance. 805 * 806 * @param scheme of Scheme 807 * @param checkpointer of S3Checkpointer 808 * @param identifier of URI 809 * @param sinkMode of SinkMode 810 */ 811 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, S3Checkpointer checkpointer, URI identifier, SinkMode sinkMode ) 812 { 813 this( scheme, null, checkpointer, identifier, sinkMode ); 814 } 815 816 /** 817 * Constructor S3Tap creates a new S3Tap instance. 818 * 819 * @param scheme of Scheme 820 * @param s3Client of AmazonS3 821 * @param checkpointer of S3Checkpointer 822 * @param identifier of URI 823 * @param sinkMode of SinkMode 824 */ 825 public S3Tap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, S3Checkpointer checkpointer, URI identifier, SinkMode sinkMode ) 826 { 827 super( scheme, sinkMode ); 828 this.s3Client = s3Client; 829 this.checkpointer = checkpointer; 830 831 if( identifier == null ) 832 throw new IllegalArgumentException( "identifier may not be null" ); 833 834 if( !identifier.getScheme().equalsIgnoreCase( "s3" ) ) 835 throw new IllegalArgumentException( "identifier does not have s3 scheme" ); 836 837 this.bucketName = getBucketNameFor( identifier ); 838 839 if( isEmpty( this.bucketName ) ) 840 throw new IllegalArgumentException( "bucket name may not be null or empty" + identifier ); 841 842 this.key = cleanKey( identifier ); 843 844 if( identifier.getQuery() != null ) 845 filter = globPredicate( identifier.getQuery() ); 846 } 847 848 protected String getBucketNameFor( URI identifier ) 849 { 850 String authority = identifier.getAuthority(); 851 852 if( isEmpty( authority ) ) 853 throw new IllegalArgumentException( "identifier must have an authority: " + identifier ); 854 855 int pos = authority.indexOf( '@' ); 856 857 if( pos != -1 ) 858 return authority.substring( pos + 1 ); 859 860 return authority; 861 } 862 863 private static Predicate<String> globPredicate( String glob ) 864 { 865 String regex = getRegexForGlob( glob ); 866 Pattern pattern = Pattern.compile( regex ); 867 868 return string -> pattern.matcher( string ).matches(); 869 } 870 871 private static String getRegexForGlob( String glob ) 872 { 873 return (String) Util.invokeStaticMethod( 874 "sun.nio.fs.Globs", 875 "toUnixRegexPattern", 876 new Object[]{glob}, 877 new Class[]{String.class} 878 ); 879 } 880 881 @Override 882 public TapWith<Properties, InputStream, OutputStream> withScheme( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme ) 883 { 884 // don't lazily create s3Client 885 return create( scheme, s3Client, getBucketName(), getKey(), getDelimiter(), getFilter(), getSinkMode() ); 886 } 887 888 @Override 889 public TapWith<Properties, InputStream, OutputStream> withChildIdentifier( String identifier ) 890 { 891 URI uri; 892 893 if( identifier.startsWith( "s3://" ) ) 894 uri = URI.create( identifier ); 895 else if( identifier.startsWith( getBucketName() ) ) 896 uri = makeURI( identifier, null ); 897 else 898 uri = makeURI( getBucketName(), getKey() + ( identifier.startsWith( delimiter ) ? identifier : delimiter + identifier ) ); 899 900 // don't lazily create s3Client 901 return create( getScheme(), s3Client, uri, getSinkMode() ); 902 } 903 904 protected TapWith<Properties, InputStream, OutputStream> create( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, URI identifier, SinkMode sinkMode ) 905 { 906 try 907 { 908 return Util.newInstance( getClass(), scheme, s3Client, identifier, sinkMode ); 909 } 910 catch( CascadingException exception ) 911 { 912 throw new TapException( "unable to create a new instance of: " + getClass().getName(), exception ); 913 } 914 } 915 916 @Override 917 public TapWith<Properties, InputStream, OutputStream> withSinkMode( SinkMode sinkMode ) 918 { 919 // don't lazily create s3Client 920 return create( getScheme(), s3Client, getBucketName(), getKey(), getDelimiter(), getFilter(), sinkMode ); 921 } 922 923 protected TapWith<Properties, InputStream, OutputStream> create( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, AmazonS3 s3Client, String bucketName, String key, String delimiter, Predicate<String> filter, SinkMode sinkMode ) 924 { 925 try 926 { 927 return Util.newInstance( getClass(), scheme, s3Client, bucketName, key, delimiter, filter, sinkMode ); 928 } 929 catch( CascadingException exception ) 930 { 931 throw new TapException( "unable to create a new instance of: " + getClass().getName(), exception ); 932 } 933 } 934 935 protected String cleanKey( URI identifier ) 936 { 937 String path = identifier.normalize().getPath(); 938 939 if( path.startsWith( "/" ) ) 940 path = path.substring( 1 ); 941 942 return path; 943 } 944 945 protected AmazonS3 getS3Client( Properties properties ) 946 { 947 // return provided client 948 if( s3Client != null ) 949 return s3Client; 950 951 AmazonS3ClientBuilder standard = AmazonS3ClientBuilder.standard(); 952 953 if( properties != null ) 954 { 955 String endpoint = properties.getProperty( S3TapProps.S3_ENDPOINT ); 956 String region = properties.getProperty( S3TapProps.S3_REGION, "us-east-1" ); 957 958 if( properties.containsKey( S3TapProps.S3_PROXY_HOST ) ) 959 { 960 ClientConfiguration config = new ClientConfiguration() 961 .withProxyHost( properties.getProperty( S3TapProps.S3_PROXY_HOST ) ) 962 .withProxyPort( PropertyUtil.getIntProperty( properties, S3TapProps.S3_PROXY_PORT, -1 ) ); 963 964 standard.withClientConfiguration( config ); 965 } 966 967 if( endpoint != null ) 968 standard.withEndpointConfiguration( new AwsClientBuilder.EndpointConfiguration( endpoint, region ) ); 969 else 970 standard.setRegion( region ); 971 972 if( Boolean.parseBoolean( properties.getProperty( S3TapProps.S3_PATH_STYLE_ACCESS, "false" ) ) ) 973 standard.enablePathStyleAccess(); 974 } 975 976 return standard.build(); 977 } 978 979 /** 980 * Method getCheckpointer returns the checkpointer of this S3Tap object. 981 * 982 * @return the checkpointer (type S3Checkpointer) of this S3Tap object. 983 */ 984 public S3Checkpointer getCheckpointer() 985 { 986 return checkpointer; 987 } 988 989 /** 990 * Method getBucketName returns the bucketName of this S3Tap object. 991 * 992 * @return the bucketName (type String) of this S3Tap object. 993 */ 994 public String getBucketName() 995 { 996 return bucketName; 997 } 998 999 /** 1000 * Method getKey returns the key of this S3Tap object. 1001 * 1002 * @return the key (type String) of this S3Tap object. 1003 */ 1004 public String getKey() 1005 { 1006 return key; 1007 } 1008 1009 protected String getMarker() 1010 { 1011 if( checkpointer != null ) 1012 return checkpointer.getLastKey( getBucketName() ); 1013 1014 return null; 1015 } 1016 1017 protected void setLastMarker( String marker ) 1018 { 1019 if( checkpointer != null ) 1020 checkpointer.setLastKey( getBucketName(), marker ); 1021 } 1022 1023 protected void commitMarker() 1024 { 1025 if( checkpointer != null ) 1026 checkpointer.commit(); 1027 } 1028 1029 /** 1030 * Method getFilter returns the filter of this S3Tap object. 1031 * 1032 * @return the filter (type Predicate) of this S3Tap object. 1033 */ 1034 public Predicate<String> getFilter() 1035 { 1036 return filter; 1037 } 1038 1039 /** 1040 * Method getDelimiter returns the delimiter of this S3Tap object. 1041 * 1042 * @return the delimiter (type String) of this S3Tap object. 1043 */ 1044 public String getDelimiter() 1045 { 1046 return delimiter; 1047 } 1048 1049 @Override 1050 public String getIdentifier() 1051 { 1052 return makeStringIdentifier( getBucketName(), getKey() ); 1053 } 1054 1055 @Override 1056 public String getFullIdentifier( Properties conf ) 1057 { 1058 return getIdentifier(); 1059 } 1060 1061 @Override 1062 public boolean deleteResource( Properties conf ) throws IOException 1063 { 1064 AmazonS3 s3Client = getS3Client( conf ); 1065 1066 try 1067 { 1068 s3Client.deleteObject( getBucketName(), getKey() ); 1069 } 1070 catch( AmazonS3Exception exception ) 1071 { 1072 throw handleException( s3Client, exception ); 1073 } 1074 1075 return true; 1076 } 1077 1078 @Override 1079 public boolean createResource( Properties conf ) throws IOException 1080 { 1081 AmazonS3 s3Client = getS3Client( conf ); 1082 1083 try 1084 { 1085 s3Client.putObject( getBucketName(), getKey(), "" ); 1086 } 1087 catch( AmazonS3Exception exception ) 1088 { 1089 throw handleException( s3Client, exception ); 1090 } 1091 1092 return true; 1093 } 1094 1095 protected ObjectMetadata getObjectMetadata( Properties conf ) 1096 { 1097 try 1098 { 1099 if( objectMetadata == null ) 1100 objectMetadata = getS3Client( conf ).getObjectMetadata( getBucketName(), getKey() ); 1101 1102 return objectMetadata; 1103 } 1104 catch( AmazonS3Exception exception ) 1105 { 1106 throw handleException( getS3Client( conf ), exception ); 1107 } 1108 } 1109 1110 private class CheckedFilterInputStream extends FilterInputStream 1111 { 1112 public CheckedFilterInputStream( InputStream inputStream ) 1113 { 1114 super( inputStream ); 1115 } 1116 } 1117 1118 @Override 1119 public TupleEntryIterator openForRead( FlowProcess<? extends Properties> flowProcess, InputStream input ) throws IOException 1120 { 1121 AmazonS3 s3Client = getS3Client( flowProcess.getConfig() ); 1122 1123 final String[] identifier = new String[ 1 ]; 1124 1125 CloseableIterator<InputStream> iterator = new CloseableIterator<InputStream>() 1126 { 1127 S3Iterable iterable = S3Iterable.iterable( s3Client, getBucketName(), getKey() ) 1128 .withFilter( getFilter() ) 1129 .withMarker( getMarker() ); 1130 1131 Iterator<S3ObjectSummary> iterator = iterable.iterator(); 1132 InputStream lastInputStream; 1133 1134 @Override 1135 public boolean hasNext() 1136 { 1137 return iterator.hasNext(); 1138 } 1139 1140 @Override 1141 public InputStream next() 1142 { 1143 safeClose(); 1144 1145 S3ObjectSummary objectSummary = iterator.next(); 1146 1147 identifier[ 0 ] = makeStringIdentifier( objectSummary.getBucketName(), objectSummary.getKey() ); 1148 1149 flowProcess.getFlowProcessContext().setSourcePath( identifier[ 0 ] ); 1150 1151 if( LOG.isDebugEnabled() ) 1152 LOG.debug( "s3 retrieving: {}/{}, with size: {}", objectSummary.getBucketName(), objectSummary.getKey(), objectSummary.getSize() ); 1153 1154 // getObject does not seem to fill the InputStream, nor does the InputStream support marking 1155 // may make sense to wrap this iterator in a iterate ahead iterator that attempts to pre-fetch objects in a different thread 1156 lastInputStream = new CheckedFilterInputStream( s3Client.getObject( objectSummary.getBucketName(), objectSummary.getKey() ).getObjectContent() ) 1157 { 1158 @Override 1159 public void close() throws IOException 1160 { 1161 setLastMarker( objectSummary.getKey() ); 1162 super.close(); 1163 } 1164 }; 1165 1166 return lastInputStream; 1167 } 1168 1169 private void safeClose() 1170 { 1171 try 1172 { 1173 if( lastInputStream != null ) 1174 lastInputStream.close(); 1175 1176 lastInputStream = null; 1177 } 1178 catch( IOException exception ) 1179 { 1180 // do nothing 1181 } 1182 } 1183 1184 @Override 1185 public void close() 1186 { 1187 safeClose(); 1188 commitMarker(); 1189 } 1190 }; 1191 1192 return new TupleEntrySchemeIterator<Properties, InputStream>( flowProcess, this, getScheme(), iterator, () -> identifier[ 0 ] ); 1193 } 1194 1195 @Override 1196 public TupleEntryCollector openForWrite( FlowProcess<? extends Properties> flowProcess, OutputStream outputStream ) throws IOException 1197 { 1198 AmazonS3 s3Client = getS3Client( flowProcess.getConfig() ); 1199 1200 if( !s3Client.doesBucketExistV2( getBucketName() ) ) 1201 s3Client.createBucket( getBucketName() ); 1202 1203 PipedInputStream pipedInputStream = new PipedInputStream(); 1204 PipedOutputStream pipedOutputStream = new PipedOutputStream( pipedInputStream ); 1205 1206 TransferManager transferManager = getTransferManager( s3Client ); 1207 1208 ObjectMetadata metadata = new ObjectMetadata(); 1209 1210 final String key = resolveKey( flowProcess, getKey() ); 1211 1212 LOG.info( "starting async upload: {}", makeStringIdentifier( getBucketName(), key ) ); 1213 1214 Upload upload = transferManager.upload( getBucketName(), key, pipedInputStream, metadata ); 1215 1216 return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, this, getScheme(), pipedOutputStream, makeStringIdentifier( getBucketName(), key ) ) 1217 { 1218 @Override 1219 public void close() 1220 { 1221 super.close(); // flushes and closes output 1222 1223 try 1224 { 1225 UploadResult uploadResult = upload.waitForUploadResult(); 1226 1227 if( uploadResult != null ) 1228 { 1229 if( LOG.isDebugEnabled() ) 1230 LOG.debug( "completed upload: {}, with key: {}", getIdentifier(), uploadResult.getKey() ); 1231 } 1232 } 1233 catch( InterruptedException exception ) 1234 { 1235 // ignore 1236 } 1237 finally 1238 { 1239 transferManager.shutdownNow( false ); 1240 } 1241 } 1242 }; 1243 } 1244 1245 protected TransferManager getTransferManager( AmazonS3 s3Client ) 1246 { 1247 if( transferManager == null ) 1248 transferManager = TransferManagerBuilder.standard().withS3Client( s3Client ).build(); 1249 1250 return transferManager; 1251 } 1252 1253 protected String resolveKey( FlowProcess<? extends Properties> flowProcess, String key ) 1254 { 1255 int partNum = flowProcess.getIntegerProperty( PartitionTap.PART_NUM_PROPERTY, 0 ); 1256 1257 key = key.replace( SEQUENCE_TOKEN, String.format( "%05d", partNum ) ); 1258 1259 if( getScheme() instanceof FileFormat ) 1260 return key + "." + ( (FileFormat) getScheme() ).getExtension(); 1261 1262 return key; 1263 } 1264 1265 @Override 1266 public boolean resourceExists( Properties conf ) throws IOException 1267 { 1268 AmazonS3 s3Client = getS3Client( conf ); 1269 1270 try 1271 { 1272 if( getKey() == null ) 1273 return s3Client.doesBucketExistV2( getBucketName() ); 1274 1275 return s3Client.doesObjectExist( getBucketName(), getKey() ); 1276 } 1277 catch( AmazonS3Exception exception ) 1278 { 1279 throw handleException( s3Client, exception ); 1280 } 1281 } 1282 1283 protected AmazonS3Exception handleException( AmazonS3 s3Client, AmazonS3Exception exception ) 1284 { 1285 if( exception.getStatusCode() == 400 ) 1286 { 1287 LOG.error( "s3 request failed, try changing the AWS Region from: {}, using property: {}", s3Client.getRegionName(), S3TapProps.S3_REGION, exception ); 1288 } 1289 1290 return exception; 1291 } 1292 1293 @Override 1294 public long getModifiedTime( Properties conf ) throws IOException 1295 { 1296 return getObjectMetadata( conf ).getLastModified().getTime(); 1297 } 1298 1299 @Override 1300 public boolean isDirectory( FlowProcess<? extends Properties> flowProcess ) throws IOException 1301 { 1302 return MIME_DIRECTORY.equalsIgnoreCase( getObjectMetadata( flowProcess.getConfig() ).getContentType() ); 1303 } 1304 1305 @Override 1306 public boolean isDirectory( Properties conf ) throws IOException 1307 { 1308 return isDirectory( FlowProcess.nullFlowProcess() ); 1309 } 1310 1311 @Override 1312 public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess ) throws IOException 1313 { 1314 return getChildIdentifiers( flowProcess.getConfig() ); 1315 } 1316 1317 @Override 1318 public String[] getChildIdentifiers( Properties conf ) throws IOException 1319 { 1320 return getChildIdentifiers( conf, 1, false ); 1321 } 1322 1323 @Override 1324 public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess, int depth, boolean fullyQualified ) throws IOException 1325 { 1326 return getChildIdentifiers( flowProcess.getConfig(), depth, fullyQualified ); 1327 } 1328 1329 @Override 1330 public String[] getChildIdentifiers( Properties conf, int depth, boolean fullyQualified ) throws IOException 1331 { 1332 if( !resourceExists( conf ) ) 1333 return new String[ 0 ]; 1334 1335 S3Iterable objects = S3Iterable.iterable( getS3Client( conf ), getBucketName(), getKey() ) 1336 .withDelimiter( getDelimiter() ) 1337 .withMaxDepth( depth ) 1338 .withFilter( getFilter() ) 1339 .withMarker( getMarker() ); 1340 1341 Iterator<S3ObjectSummary> iterator = objects.iterator(); 1342 1343 List<String> results = new ArrayList<>(); 1344 1345 while( iterator.hasNext() ) 1346 results.add( makePath( iterator, fullyQualified ) ); 1347 1348 return results.toArray( new String[ results.size() ] ); 1349 } 1350 1351 protected String makePath( Iterator<S3ObjectSummary> iterator, boolean fullyQualified ) 1352 { 1353 String key = iterator.next().getKey(); 1354 1355 if( fullyQualified ) 1356 return makeStringIdentifier( getBucketName(), key ); 1357 1358 return key.substring( getKey().length() ); 1359 } 1360 1361 @Override 1362 public long getSize( FlowProcess<? extends Properties> flowProcess ) throws IOException 1363 { 1364 return getSize( flowProcess.getConfig() ); 1365 } 1366 1367 @Override 1368 public long getSize( Properties conf ) throws IOException 1369 { 1370 if( isDirectory( conf ) ) 1371 return 0; 1372 1373 return getObjectMetadata( conf ).getInstanceLength(); 1374 } 1375 1376 protected static String makeStringIdentifier( String bucketName, String keyPrefix ) 1377 { 1378 if( isEmpty( keyPrefix ) ) 1379 return String.format( "s3://%s/", bucketName ); 1380 1381 return String.format( "s3://%s/%s", bucketName, keyPrefix ); 1382 } 1383 }