Sha256: 474da0c34ca0e03a052e67ae7e8f246a2f2d86c3644b45813c61144567a03a0a

Contents?: true

Size: 1.05 KB

Versions: 5

Compression:

Stored size: 1.05 KB

Contents

package org.embulk.input.hdfs;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created by takahiro.nakayama on 8/20/15.
 */
public class HdfsFilePartitioner
{
    private FileSystem fs;
    private Path path;
    private int numPartitions;

    public HdfsFilePartitioner(FileSystem fs, Path path, int numPartitions)
    {
        this.fs = fs;
        this.path = path;
        this.numPartitions = numPartitions;
    }

    public List<HdfsPartialFile> getHdfsPartialFiles() throws IOException
    {
        List<HdfsPartialFile> hdfsPartialFiles = new ArrayList<>();
        long size = fs.getFileStatus(path).getLen();
        for (int i = 0; i < numPartitions; i++) {
            long start = size * i / numPartitions;
            long end = size * (i + 1) / numPartitions;
            if (start < end) {
                hdfsPartialFiles.add(new HdfsPartialFile(path.toString(), start, end));
            }
        }
        return hdfsPartialFiles;
    }
}

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
embulk-input-hdfs-0.1.7 src/main/java/org/embulk/input/hdfs/HdfsFilePartitioner.java
embulk-input-hdfs-0.1.6 src/main/java/org/embulk/input/hdfs/HdfsFilePartitioner.java
embulk-input-hdfs-0.1.5 src/main/java/org/embulk/input/hdfs/HdfsFilePartitioner.java
embulk-input-hdfs-0.1.4 src/main/java/org/embulk/input/hdfs/HdfsFilePartitioner.java
embulk-input-hdfs-0.1.1 src/main/java/org/embulk/input/hdfs/HdfsFilePartitioner.java