07-21-2011 07:10 AM
http://wiki.alfresco.com/wiki/Metadata_Extraction
07-22-2011 05:55 AM
07-22-2011 06:28 AM
07-22-2011 08:16 AM
07-22-2011 08:30 AM
07-25-2011 07:03 AM
package com.mpb.extracter;
import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Map;
import java.util.Set;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter;
import org.alfresco.service.namespace.QName;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.odf.OpenDocumentParser;
public class EnhancedOpenOffice extends TikaPoweredMetadataExtracter
{
private static final String CUSTOM_PREFIX = "custom:";
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
new String[] {
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT,
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_TEMPLATE,
MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS,
MimetypeMap.MIMETYPE_OPENDOCUMENT_GRAPHICS_TEMPLATE,
MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION,
MimetypeMap.MIMETYPE_OPENDOCUMENT_PRESENTATION_TEMPLATE,
MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET,
MimetypeMap.MIMETYPE_OPENDOCUMENT_SPREADSHEET_TEMPLATE,
MimetypeMap.MIMETYPE_OPENDOCUMENT_CHART,
MimetypeMap.MIMETYPE_OPENDOCUMENT_CHART_TEMPLATE,
MimetypeMap.MIMETYPE_OPENDOCUMENT_IMAGE,
MimetypeMap.MIMETYPE_OPENDOCUMENT_IMAGE_TEMPLATE,
MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA,
MimetypeMap.MIMETYPE_OPENDOCUMENT_FORMULA_TEMPLATE,
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_MASTER,
MimetypeMap.MIMETYPE_OPENDOCUMENT_TEXT_WEB,
MimetypeMap.MIMETYPE_OPENDOCUMENT_DATABASE
}, new OpenDocumentParser()
);
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss");
public EnhancedOpenOffice()
{
super(SUPPORTED_MIMETYPES);
}
@Override
protected Parser getParser() {
return new OpenDocumentParser();
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String, String> headers)
{
// Handle user-defined properties dynamically
Map<String, Set<QName>> mapping = super.getMapping();
for (String key : mapping.keySet())
{
if (metadata.get(CUSTOM_PREFIX + key) != null)
{
putRawValue(key, metadata.get(CUSTOM_PREFIX + key), properties);
}
}
return properties;
}
private Date getDateOrNull(String dateString)
{
if (dateString != null && dateString.length() != 0)
{
try {
return dateFormat.parse(dateString);
} catch(ParseException e) {}
}
return null;
}
}
package com.mpb.extracter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Map;
import org.alfresco.repo.content.MimetypeMap;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Set;
import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter;
import org.alfresco.service.namespace.QName;
public class EnhancedMicrosoft extends TikaPoweredMetadataExtracter
{
private static final String CUSTOM_PREFIX = "custom:";
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
new String[] {
MimetypeMap.MIMETYPE_WORD,
MimetypeMap.MIMETYPE_EXCEL,
MimetypeMap.MIMETYPE_PPT},
new OfficeParser()
);
static {
// Outlook has it's own one!
SUPPORTED_MIMETYPES.remove(MimetypeMap.MIMETYPE_OUTLOOK_MSG);
}
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss");
public EnhancedMicrosoft()
{
super(SUPPORTED_MIMETYPES);
}
@Override
protected Parser getParser() {
return new OfficeParser();
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String, String> headers)
{
// Handle user-defined properties dynamically
Map<String, Set<QName>> mapping = super.getMapping();
for (String key : mapping.keySet())
{
if (metadata.get(CUSTOM_PREFIX + key) != null)
{
putRawValue(key, metadata.get(CUSTOM_PREFIX + key), properties);
}
}
return properties;
}
private Date getDateOrNull(String dateString)
{
if (dateString != null && dateString.length() != 0)
{
try {
return dateFormat.parse(dateString);
} catch(ParseException e) {}
}
return null;
}
}
#
# OpenDocumentMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.custom=custom.model
# Mappings
creationDate=cm:created
creator=cm:author
date=
description=
generator=
initialCreator=
keyword=
language=
printDate=
printedBy=
subject=cm:description
title=cm:title
# mine
user1=custom:user1
#
# OfficeMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
namespace.prefix.custom=custom.model
# Mappings
author=cm:author
title=cm:title
subject=cm:description
createDateTime=cm:created
lastSaveDateTime=cm:modified
# mine
user1=custom:user1
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<!–
This sample show how to modify the mappings properties of the new V2.1 Metadata Extractors.
In this example, in addition to the default mappings, the field 'user1' is mapped to
'cm:description'. The available source properties is described on the Javadocs of the
extracter class.
–>
<beans>
<!– This adds in the extra mapping for the Open Document extractor –>
<bean id="extracter.OpenDocument" class="com.mpb.extracter.EnhancedOpenOffice" parent="baseMetadataExtracter" >
<property name="inheritDefaultMapping">
<value>true</value>
</property>
<property name="mappingProperties">
<props>
<prop key="namespace.prefix.custom">custom.model</prop>
<prop key="user1">custom:user1</prop>
</props>
</property>
</bean>
<!– This adds in the extra mapping for the Open Document extractor –>
<bean id="extracter.Office" class="com.mpb.extracter.EnhancedMicrosoft" parent="baseMetadataExtracter" >
<property name="inheritDefaultMapping">
<value>true</value>
</property>
<property name="mappingProperties">
<props>
<prop key="namespace.prefix.custom">custom.model</prop>
<prop key="user1">custom:user1</prop>
</props>
</property>
</bean>
</beans>
07-25-2011 08:08 AM
Y
putRawValue("user1", metadata.get("user1"), properties);
07-25-2011 08:39 AM
07-30-2011 07:44 AM
07-31-2011 06:16 AM
08-01-2011 12:03 PM
What happened when you ran the debugger? Is your custom property being extracted from the word document or is the problem that the value is being extracted but not mapped correctly?
Tags
Find what you came for
We want to make your experience in Hyland Connect as valuable as possible, so we put together some helpful links.