src/main/java/org/embulk/input/s3/FileList.java in embulk-input-s3-0.2.4 vs src/main/java/org/embulk/input/s3/FileList.java in embulk-input-s3-0.2.5

- old
+ new

@@ -3,10 +3,12 @@ import java.util.List; import java.util.AbstractList; import java.util.ArrayList; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; +import java.util.regex.Pattern; +import java.util.regex.Matcher; import java.io.InputStream; import java.io.OutputStream; import java.io.BufferedOutputStream; import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; @@ -27,10 +29,14 @@ // this class should be moved to embulk-core public class FileList { public interface Task { + @Config("path_match_pattern") + @ConfigDefault("\".*\"") + String getPathMatchPattern(); + @Config("total_file_count_limit") @ConfigDefault("2147483647") int getTotalFileCountLimit(); } @@ -61,21 +67,25 @@ private final OutputStream stream; private final List<Entry> entries = new ArrayList<>(); private String last = null; private int limitCount = Integer.MAX_VALUE; + private Pattern pathMatchPattern; + private final ByteBuffer castBuffer = ByteBuffer.allocate(4); public Builder(Task task) { this(); this.limitCount = task.getTotalFileCountLimit(); + this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern()); } public Builder(ConfigSource config) { this(); + this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*")); this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE); } public Builder() { @@ -92,29 +102,37 @@ { this.limitCount = limitCount; return this; } + public Builder pathMatchPattern(String pattern) + { + this.pathMatchPattern = Pattern.compile(pattern); + return this; + } + public int size() { return entries.size(); } public boolean needsMore() { return size() < limitCount; } + // returns true if this file is used public synchronized boolean add(String path, long size) { // TODO throw IllegalStateException if stream is already closed if (!needsMore()) { return false; } - // TODO in the future, support some other filtering parameters (file name suffix filter, regex filter, etc) - // and return false if filtered out. + if (!pathMatchPattern.matcher(path).matches()) { + return false; + } int index = entries.size(); entries.add(new Entry(index, size)); byte[] data = path.getBytes(StandardCharsets.UTF_8);