2010年11月1日月曜日

GrailsとTika

アップロードしたファイルのテキストを抽出したいのでApache Tikaを使う。

grails-app/conf/BuildConfig.groovy

grails.project.dependency.resolution = {
dependencies {
build 'org.apache.tika:tika-parsers:0.7'
}
}


grails-app/controller/UploadFileController.groovy

import org.springframework.web.multipart.MultipartFile
import org.apache.tika.metadata.Metadata
import org.apache.tika.parser.AutoDetectParser
import org.apache.tika.sax.BodyContentHandler

class UploadFileController {

:

def save = {
def uploadFileInstance = new UploadFile()
def file = params.file
if (file instanceof MultipartFile && file.originalFilename) {
uploadFileInstance.name = file.originalFilename
uploadFileInstance.size = file.size
uploadFileInstance.contentType = file.contentType
uploadFileInstance.bytes = file.bytes
def stream
try {
stream = file.inputStream
def parser = new AutoDetectParser()
def writer = new StringWriter()
def handler = new BodyContentHandler(writer)
def metadata = new Metadata()
parser.parse(stream, handler, metadata)
uploadFileInstance.text = writer.toString()
}
catch (Exception e) {
log.warn(e.message, e)
}
finally {
stream?.close()
}
}
if (uploadFileInstance.save(flush: true)) {
flash.message = "${message(code: 'default.created.message', args: [message(code: 'uploadFile.label', default: 'UploadFile'), uploadFileInstance.id])}"
redirect(action: "show", id: uploadFileInstance.id)
}
else {
render(view: "create", model: [uploadFileInstance: uploadFileInstance])
}
}

:
}