Sunday, August 30, 2015

HDFS FileSystem Java API in Maven project

1. Create Java project

2. Convert it to Maven project
In file pom.xml, add Hadoop dependencies:
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>commons-configuration</groupId>
            <artifactId>commons-configuration</artifactId>
            <version>1.10</version>
        </dependency>
        <dependency>
            <groupId>commons-lang</groupId>
            <artifactId>commons-lang</artifactId>
            <version>2.6</version>
        </dependency>
        <dependency>
            <groupId>commons-logging</groupId>
            <artifactId>commons-logging-api</artifactId>
            <version>1.1</version>
        </dependency>

3. Create new package and new class (has main() function)
Example: package name: hdfs.operations, class name: Operations

4. Put your code

package hdfs.operations;

import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class Operations {

    public static void main(String[] args) {
        FileSystem hdfs;
        try {
            hdfs = FileSystem.get(new Configuration());
            Path homeDir = hdfs.getHomeDirectory();

            // Print the home directory
            System.out.println("Home folder -" + homeDir);

        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
        }
    }

    private static final Log LOGGER = LogFactory.getLog(Operations.class);
}


5. Configure Maven build and move target jar file to destination folder
In build->plugins section in pom.xml, add one more plugin, and specify the output folder for new target jar file:

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-jar-plugin</artifactId>
            <configuration>
                <outputDirectory>/data/hadoop/jars</outputDirectory>
            </configuration>
            <version>2.6</version>
        </plugin>

6. Run with Hadoop
hadoop jar /data/hadoop/jars/jar-name.jar  hdfs.operations.Operations

No comments:

Post a Comment