forked from clemsonciti/workshop-python-intro-to-hadoop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmovieAnalyzer.pbs
30 lines (24 loc) · 862 Bytes
/
movieAnalyzer.pbs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/bin/bash
#PBS -N movieAnalyzer
#PBS -l select=1:ncpus=8:mem=8gb
#PBS -l walltime=00:15:00
#PBS -j oe
# load hdp module and initilalize Keberos tokens
module load hdp/0.1
cypress-kinit
klist
# cd into directory containing the PBS script
cd $PBS_O_WORKDIR
# attempt to remove output directory
hdfs dfs -rm -r intro-to-hadoop/output-movielens-03
# submit Hadoop job to Cypress
yarn jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-streaming.jar \
-input /repository/movielens/ratings.csv \
-output intro-to-hadoop/output-movielens-03 \
-file ./codes/avgRatingMapper02.py \
-mapper avgRatingMapper02.py \
-file ./codes/avgRatingReducer02.py \
-reducer avgRatingReducer02.py \
-file ./movielens/movies.csv
# export output data back to Palmetto for further analysis
hdfs dfs -get intro-to-hadoop/output-movielens-03/part-00000 .