src/main/java/org/embulk/input/s3/FileList.java in embulk-input-s3-0.2.4 vs src/main/java/org/embulk/input/s3/FileList.java in embulk-input-s3-0.2.5
- old
+ new
@@ -3,10 +3,12 @@
import java.util.List;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
@@ -27,10 +29,14 @@
// this class should be moved to embulk-core
public class FileList
{
public interface Task
{
+ @Config("path_match_pattern")
+ @ConfigDefault("\".*\"")
+ String getPathMatchPattern();
+
@Config("total_file_count_limit")
@ConfigDefault("2147483647")
int getTotalFileCountLimit();
}
@@ -61,21 +67,25 @@
private final OutputStream stream;
private final List<Entry> entries = new ArrayList<>();
private String last = null;
private int limitCount = Integer.MAX_VALUE;
+ private Pattern pathMatchPattern;
+
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
public Builder(Task task)
{
this();
this.limitCount = task.getTotalFileCountLimit();
+ this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
}
public Builder(ConfigSource config)
{
this();
+ this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
}
public Builder()
{
@@ -92,29 +102,37 @@
{
this.limitCount = limitCount;
return this;
}
+ public Builder pathMatchPattern(String pattern)
+ {
+ this.pathMatchPattern = Pattern.compile(pattern);
+ return this;
+ }
+
public int size()
{
return entries.size();
}
public boolean needsMore()
{
return size() < limitCount;
}
+ // returns true if this file is used
public synchronized boolean add(String path, long size)
{
// TODO throw IllegalStateException if stream is already closed
if (!needsMore()) {
return false;
}
- // TODO in the future, support some other filtering parameters (file name suffix filter, regex filter, etc)
- // and return false if filtered out.
+ if (!pathMatchPattern.matcher(path).matches()) {
+ return false;
+ }
int index = entries.size();
entries.add(new Entry(index, size));
byte[] data = path.getBytes(StandardCharsets.UTF_8);