Monday, June 24, 2013

Hadoop - Searching for something

/**
* Search files system for something in Hadoop
* @param path the path in hadoop directory, for example like [/home/hadoop/data/flume]
* @param searchWord key words, for example like [test1 test2]
* @return result for searching
*/
public String readLogsPlural(String path, String searchWord) throws Exception {
String findStr;
StringBuffer sb = new StringBuffer();
String[] multiWord = searchWord.split(" ");
Configuration conf = new Configuration();
conf.set("fs.default.name", "hdfs://172.xx.xxx.xxx:9000");
FileSystem dfs = FileSystem.get(conf);
// You need to pass in your hdfs path
FileStatus[] status = this.dfs.listStatus(new Path(path));
for (int i=0;i<status.length;i++){
FSDataInputStream fsIn = dfs.open(status[i].getPath());
BufferedReader br=new BufferedReader(new InputStreamReader(fsIn));
String line;
while ((line=br.readLine()) != null){
for (int n=0 ; n < multiWord.length; n++) {
logger.info("multiWord[" + n + "] >>> " + multiWord[n]);
findStr = ".*" + multiWord[n] + ".*";
if (line.matches(findStr)) {
if((multiWord.length-1) == n) {
logger.info("last n >>> " + n);
sb.append(line);
}
} else {
break;
}
}
}
}
return sb.toString();
}

No comments:

Post a Comment