1) Download latest Flume binaries from https://flume.apache.org/download.html
$cd $HOME/Downloads
$tar -xvzf apache-flume-1.4.0-bin.tar.gz
$ls -lrt apache-flume-1.4.0-bin
2) Copy binaries into local folder
$sudo mkdir /usr/local/flume
$sudo
$sudo chown -R butik flume
3) Set Flume home and path
$cd $HOME
$vi .bashrc
$exec bash #commit the changes
4) Say hallo world to flume
$cd $FLUME_PREFIX/conf
$vi hw.conf
agent.sources=s1
agent.channels=c1
agent.sinks=k1
agent.sources.s1.type=netcat
agent.sources.s1.channels=c1
agent.sources.s1.bind=0.0.0.0
agent.sources.s1.port=12345
agent.channels.c1.type=memory
agent.sinks.k1.type=logger
agent.sinks.k1.channel=c1
Start the flume agent
$./bin/flume-ng agent -n agent -c conf -f conf/hw.conf -Dflume.root.logger=INFO,console
From another terminal sends command line feeds and check the agent logs.
$telnet localhost 12345
5) Now lets collect webserver logs through Flume
Install apache
$sudo apt-get update
$sudo apt-get install apache2
$sudo vi /var/www/html/index.html
<!DOCTYPE html>
<html>
<body>
<h1>Welcome to the world of bigdata</h1>
<p>Let us flume web server logs into HDFS</p>
</body>
</html>
Check localhost or 127.0.0.1 for the webpage just created.
$cd $FLUME_PREFIX/conf
flume-env.sh
JAVA_HOME=/usr/lib/jvm/java-1. 7.0-openjdk-amd64
HADOOP_PREFIX=/usr/local/ hadoop
# Note that the Flume conf directory is always included in the classpath.
FLUME_CLASSPATH=/usr/local/ flume/lib/flume-sources-1.0- SNAPSHOT.jar
HADOOP_PREFIX=/usr/local/
# Note that the Flume conf directory is always included in the classpath.
FLUME_CLASSPATH=/usr/local/
flume-conf.properties
tail1.sources = src1
tail1.channels = ch1
tail1.sinks = sink1
tail1.sources.src1.type = exec
tail1.sources.src1.command = tail -F /var/log/apache2/access.log
tail1.sources.src1.channels = ch1
tail1.channels.ch1.type = memory
tail1.channels.ch1.capacity = 1000
tail1.sinks.sink1.type = avro
tail1.sinks.sink1.hostname = localhost
tail1.sinks.sink1.port = 6000
tail1.sinks.sink1.batch-size = 1
tail1.sinks.sink1.channel = ch1
##
collector1.sources = src1
collector1.channels = ch1
collector1.sinks = sink1
collector1.sources.src1.type = avro
collector1.sources.src1.bind = localhost
collector1.sources.src1.port = 6000
collector1.sources.src1.
collector1.channels.ch1.type = memory
collector1.channels.ch1.
collector1.sinks.sink1.type = hdfs
collector1.sinks.sink1.hdfs.
collector1.sinks.sink1.hdfs.
collector1.sinks.sink1.hdfs.
collector1.sinks.sink1.hdfs.
collector1.sinks.sink1.channel = ch1
log4j.properties
flume.root.logger=DEBUG,A1
#flume.root.logger=INFO,
#flume.log.dir=./logs
flume.log.dir=/usr/local/
flume.log.file=flume.log
#log4j.logger.org.apache.
log4j.logger.org.jboss = WARN
log4j.logger.org.mortbay = INFO
log4j.logger.org.apache.avro.
log4j.logger.org.apache.hadoop = INFO
# Define the root logger to the system property "flume.root.logger".
log4j.rootLogger=${flume.root.
# Stock log4j rolling file appender
# Default log rotation configuration
log4j.appender.LOGFILE=org.
log4j.appender.LOGFILE.
log4j.appender.LOGFILE.
log4j.appender.LOGFILE.File=${
log4j.appender.LOGFILE.layout=
log4j.appender.LOGFILE.layout.
# Warning: If you enable the following appender it will fill up your disk if you don't have a cleanup job!
# This uses the updated rolling file appender from log4j-extras that supports a reliable time-based rolling policy.
# See http://logging.apache.org/
# Add "DAILY" to flume.root.logger above if you want to use this
log4j.appender.DAILY=org.
log4j.appender.DAILY.
log4j.appender.DAILY.
log4j.appender.DAILY.
log4j.appender.DAILY.layout=
log4j.appender.DAILY.layout.
# Add "console" to flume.root.logger above if you want to use this
log4j.appender.A1=org.apache.
log4j.appender.A1.target=
log4j.appender.A1.layout=org.
log4j.appender.A1.layout.
Start agents
$cd $FLUME_PREFIX/bin
$./flume-ng agent --conf /usr/local/flume/conf/ --conf-file /usr/local/flume/conf/flume-
keep refreshing your webpage
We are done. Check the data in HDFS
I have created $sudo vi /var/www/html/index.html as per stated above. I have started server and not able to open this in browser or telnet.
ReplyDeletePlease guide me how to refresh index.html file to create logs