05-20-2008 04:36 PM
/*
* Copyright (C) 2005 Jesper Steen Møller
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* As a special exception to the terms and conditions of version 2.0 of
* the GPL, you may redistribute this Program in connection with Free/Libre
* and Open Source Software ("FLOSS") applications as described in Alfresco's
* FLOSS exception. You should have recieved a copy of the text describing
* the FLOSS exception, and it is also available here:
* http://www.alfresco.com/legal/licensing"
*/
//package org.alfresco.repo.content.metadata;
package org.alfrescox.repo.content.metadata;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
/**
* Office file format Metadata Extracter. This extracter uses the POI library to extract
* the following:
* <pre>
* <b>author:</b> – cm:author
* <b>title:</b> – cm:title
* <b>subject:</b> – cm:description
* <b>createDateTime:</b> – cm:created
* <b>lastSaveDateTime:</b> – cm:modified
* <b>comments:</b>
* <b>editTime:</b>
* <b>format:</b>
* <b>keywords:</b>
* <b>lastAuthor:</b>
* <b>lastPrinted:</b>
* <b>osVersion:</b>
* <b>thumbnail:</b>
* <b>pageCount:</b>
* <b>wordCount:</b>
* </pre>
*
* @author Jesper Steen Møller
* @author Derek Hulley
*/
public class MyOfficeMetadataExtracter extends AbstractMappingMetadataExtracter
{
public static final String KEY_AUTHOR = "author";
public static final String KEY_TITLE = "title";
public static final String KEY_SUBJECT = "subject";
public static final String KEY_CREATE_DATETIME = "createDateTime";
public static final String KEY_LAST_SAVE_DATETIME = "lastSaveDateTime";
public static final String KEY_COMMENTS = "comments";
public static final String KEY_EDIT_TIME = "editTime";
public static final String KEY_FORMAT = "format";
public static final String KEY_KEYWORDS = "keywords";
public static final String KEY_LAST_AUTHOR = "lastAuthor";
public static final String KEY_LAST_PRINTED = "lastPrinted";
public static final String KEY_OS_VERSION = "osVersion";
public static final String KEY_THUMBNAIL = "thumbnail";
public static final String KEY_PAGE_COUNT = "pageCount";
public static final String KEY_WORD_COUNT = "wordCount";
public static final String KEY_CATEGORY = "category";
public static final String KEY_MANAGER = "manager";
public static final String KEY_CLIENTE = "cliente";
public static final String KEY_MATTER = "matter";
public static final String KEY_LANGUAGE = "language";
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_WORD,
MimetypeMap.MIMETYPE_EXCEL,
MimetypeMap.MIMETYPE_PPT};
public MyOfficeMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
}
@Override
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
final Map<String, Serializable> rawProperties = newRawMap();
POIFSReaderListener readerListener = new POIFSReaderListener()
{
public void processPOIFSReaderEvent(final POIFSReaderEvent event)
{
try
{
PropertySet ps = PropertySetFactory.create(event.getStream());
if (ps instanceof SummaryInformation)
{
SummaryInformation si = (SummaryInformation) ps;
putRawValue(KEY_AUTHOR, si.getAuthor(), rawProperties);
putRawValue(KEY_TITLE, si.getTitle(), rawProperties);
putRawValue(KEY_SUBJECT, si.getSubject(), rawProperties);
putRawValue(KEY_CREATE_DATETIME, si.getCreateDateTime(), rawProperties);
putRawValue(KEY_LAST_SAVE_DATETIME, si.getLastSaveDateTime(), rawProperties);
putRawValue(KEY_COMMENTS, si.getComments(), rawProperties);
putRawValue(KEY_EDIT_TIME, si.getEditTime(), rawProperties);
putRawValue(KEY_FORMAT, si.getFormat(), rawProperties);
putRawValue(KEY_KEYWORDS, si.getKeywords(), rawProperties);
putRawValue(KEY_LAST_AUTHOR, si.getLastAuthor(), rawProperties);
putRawValue(KEY_LAST_PRINTED, si.getLastPrinted(), rawProperties);
putRawValue(KEY_OS_VERSION, si.getOSVersion(), rawProperties);
putRawValue(KEY_THUMBNAIL, si.getThumbnail(), rawProperties);
putRawValue(KEY_PAGE_COUNT, si.getPageCount(), rawProperties);
putRawValue(KEY_WORD_COUNT, si.getWordCount(), rawProperties);
}
}
catch (Exception ex)
{
throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), ex);
}
// here are the modifications I made…
try
{
PropertySet psd = PropertySetFactory.create(event.getStream());
if (psd instanceof DocumentSummaryInformation)
{
DocumentSummaryInformation dsi = (DocumentSummaryInformation) psd;
putRawValue(KEY_CATEGORY, dsi.getCategory(),rawProperties);
putRawValue(KEY_MANAGER, dsi.getManager(),rawProperties);
putRawValue(KEY_CLIENTE, dsi.getCustomProperties().get("Cliente"),rawProperties);
putRawValue(KEY_MATTER, dsi.getCustomProperties().get("Matter),rawProperties);
putRawValue(KEY_LANGUAGE, dsi.getCustomProperties().get("Language"),rawProperties);
}
}
catch (Exception exd)
{
throw new ContentIOException("Property set stream: " + event.getPath() + event.getName(), exd);
}
//modifications end here
}
};
InputStream is = null;
try
{
is = reader.getContentInputStream();
POIFSReader poiFSReader = new POIFSReader();
poiFSReader.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME);
poiFSReader.read(is);
}
finally
{
if (is != null)
{
try { is.close(); } catch (IOException e) {}
}
}
return rawProperties;
}
}
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<!–
This sample show how to modify the mappings properties of the new V2.1 Metadata Extractors.
In this example, in addition to the default mappings, the field 'user1' is mapped to
'cm:description'. The available source properties is described on the Javadocs of the
extracter class.
–>
<beans>
<bean id="extracter.Office" class="org.alfrescox.repo.content.metadata.MyOfficeMetadataExtracter" parent="baseMetadataExtracter" >
<property name="inheritDefaultMapping">
<value>true</value>
</property>
<property name="mappingProperties">
<props>
<prop key="namespace.prefix.cm">http://www.alfresco.org/model/content/1.0</prop>
<prop key="comments">cm:description</prop>
<prop key="namespace.prefix.ne">nye.model</prop>
<prop key="cliente">ne:Cliente</prop>
<prop key="matter">ne:Matter</prop>
<prop key="subject">ne:Practica</prop>
<prop key="category">ne:Documento</prop>
<prop key="keywords">ne:Keywords</prop>
<prop key="manager">ne:SocioR</prop>
<prop key="language">ne:Idioma</prop>
</props>
</property>
</bean>
</beans>
08-14-2008 05:03 PM
08-22-2008 12:19 PM
poiFSReader.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME);
poiFSReader.registerListener(readerListener, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
09-04-2008 04:36 PM
/*
* Copyright (C) 2005 Jesper Steen Møller
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* As a special exception to the terms and conditions of version 2.0 of
* the GPL, you may redistribute this Program in connection with Free/Libre
* and Open Source Software ("FLOSS") applications as described in Alfresco's
* FLOSS exception. You should have recieved a copy of the text describing
* the FLOSS exception, and it is also available here:
* http://www.alfresco.com/legal/licensing"
*/
//package org.alfresco.repo.content.metadata;
package com.mycompany.extract;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
/**
* Office file format Metadata Extracter. This extracter uses the POI library to extract
* the following:
* <pre>
* <b>author:</b> – cm:author
* <b>title:</b> – cm:title
* <b>subject:</b> – cm:description
* <b>createDateTime:</b> – cm:created
* <b>lastSaveDateTime:</b> – cm:modified
* <b>comments:</b>
* <b>editTime:</b>
* <b>format:</b>
* <b>keywords:</b>
* <b>lastAuthor:</b>
* <b>lastPrinted:</b>
* <b>osVersion:</b>
* <b>thumbnail:</b>
* <b>pageCount:</b>
* <b>wordCount:</b>
* </pre>
*
* @author Jesper Steen Møller
* @author Derek Hulley
*/
public class MyOfficeMetadataExtracter extends AbstractMappingMetadataExtracter
{
public static final String KEY_AUTHOR = "author";
public static final String KEY_TITLE = "title";
public static final String KEY_SUBJECT = "subject";
public static final String KEY_CREATE_DATETIME = "createDateTime";
public static final String KEY_LAST_SAVE_DATETIME = "lastSaveDateTime";
public static final String KEY_COMMENTS = "comments";
public static final String KEY_EDIT_TIME = "editTime";
public static final String KEY_FORMAT = "format";
public static final String KEY_KEYWORDS = "keywords";
public static final String KEY_LAST_AUTHOR = "lastAuthor";
public static final String KEY_LAST_PRINTED = "lastPrinted";
public static final String KEY_OS_VERSION = "osVersion";
public static final String KEY_THUMBNAIL = "thumbnail";
public static final String KEY_PAGE_COUNT = "pageCount";
public static final String KEY_WORD_COUNT = "wordCount";
public static final String KEY_CUSTOM1 = "Custom1";
public static final String KEY_CUSTOM2 = "Custom2";
public static final String KEY_FOO = "foo";
public static String[] SUPPORTED_MIMETYPES = new String[] {
MimetypeMap.MIMETYPE_WORD,
MimetypeMap.MIMETYPE_EXCEL,
MimetypeMap.MIMETYPE_PPT};
public MyOfficeMetadataExtracter()
{
super(new HashSet<String>(Arrays.asList(SUPPORTED_MIMETYPES)));
}
@Override
protected Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable
{
final Map<String, Serializable> rawProperties = newRawMap();
POIFSReaderListener readerListener1 = new POIFSReaderListener()
{
public void processPOIFSReaderEvent(final POIFSReaderEvent event)
{
try
{
PropertySet ps = PropertySetFactory.create(event.getStream());
if (ps instanceof SummaryInformation)
{
SummaryInformation si = (SummaryInformation) ps;
putRawValue(KEY_AUTHOR, si.getAuthor(), rawProperties);
putRawValue(KEY_TITLE, si.getTitle(), rawProperties);
putRawValue(KEY_SUBJECT, si.getSubject(), rawProperties);
putRawValue(KEY_CREATE_DATETIME, si.getCreateDateTime(), rawProperties);
putRawValue(KEY_LAST_SAVE_DATETIME, si.getLastSaveDateTime(), rawProperties);
putRawValue(KEY_COMMENTS, si.getComments(), rawProperties);
putRawValue(KEY_EDIT_TIME, si.getEditTime(), rawProperties);
putRawValue(KEY_FORMAT, si.getFormat(), rawProperties);
putRawValue(KEY_KEYWORDS, si.getKeywords(), rawProperties);
putRawValue(KEY_LAST_AUTHOR, si.getLastAuthor(), rawProperties);
putRawValue(KEY_LAST_PRINTED, si.getLastPrinted(), rawProperties);
putRawValue(KEY_OS_VERSION, si.getOSVersion(), rawProperties);
putRawValue(KEY_THUMBNAIL, si.getThumbnail(), rawProperties);
putRawValue(KEY_PAGE_COUNT, si.getPageCount(), rawProperties);
putRawValue(KEY_WORD_COUNT, si.getWordCount(), rawProperties);
}
}
catch (Exception ex)
{
throw new ContentIOException("Property set stream SummaryInformation: " + event.getPath() + event.getName(), ex);
}
}
};
POIFSReaderListener readerListener2 = new POIFSReaderListener()
{
public void processPOIFSReaderEvent(final POIFSReaderEvent event)
{
try
{
PropertySet psd = PropertySetFactory.create(event.getStream());
if (psd instanceof DocumentSummaryInformation)
{
DocumentSummaryInformation dsi = (DocumentSummaryInformation) psd;
putRawValue(KEY_CUSTOM1, (Serializable) dsi.getCustomProperties().get("Custom1"), rawProperties);
putRawValue(KEY_CUSTOM2, (Serializable) dsi.getCustomProperties().get("Custom2"), rawProperties);
putRawValue(KEY_FOO, (Serializable) dsi.getCustomProperties().get("Foo"), rawProperties);
}
}
catch (Exception exd)
{
throw new ContentIOException("Property set stream DocumentSummaryInfomration: " + event.getPath() + event.getName(), exd);
}
}
};
InputStream is = null;
try
{
is = reader.getContentInputStream();
POIFSReader poiFSReader = new POIFSReader();
poiFSReader.registerListener(readerListener1, SummaryInformation.DEFAULT_STREAM_NAME);
poiFSReader.registerListener(readerListener2, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
poiFSReader.read(is);
}
finally
{
if (is != null)
{
try { is.close(); } catch (IOException e) {}
}
}
return rawProperties;
}
}
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<!–
This sample show how to modify the mappings properties of the new V2.1 Metadata Extractors.
In this example, in addition to the default mappings, the field 'user1' is mapped to
'cm:description'. The available source properties is described on the Javadocs of the
extracter class.
–>
<beans>
<bean id="extracter.Office" class="com.mycompany.extract.MyOfficeMetadataExtracter" parent="baseMetadataExtracter" >
<property name="inheritDefaultMapping">
<value>true</value>
</property>
<property name="mappingProperties">
<props>
<prop key="namespace.prefix.cm">http://www.alfresco.org/model/content/1.0</prop>
<prop key="namespace.prefix.mymodel">com.mycompany.model</prop> <!– defined in myCustomModel.xml as namespace uri –>
<prop key="author">cm:author</prop>
<prop key="title">cm:title</prop>
<prop key="subject">cm:description</prop>
<prop key="createDateTime">cm:created</prop>
<prop key="lastSaveDateTime">cm:modified</prop>
<prop key="Custom1">mymodel:Custom1</prop>
<prop key="Custom2">mymodel:Custom2</prop>
<prop key="Foo">mymodel:Foo</prop>
</props>
</property>
</bean>
</beans>
Tags
Find what you came for
We want to make your experience in Hyland Connect as valuable as possible, so we put together some helpful links.