07-21-2011 07:10 AM
http://wiki.alfresco.com/wiki/Metadata_Extraction
07-21-2011 02:31 PM
package com.mpb.extracter;
import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Map;
import java.util.Set;
import org.alfresco.repo.content.MimetypeMap;
import org.alfresco.repo.content.metadata.TikaPoweredMetadataExtracter;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.alfresco.service.namespace.QName;
/**
* Office file format Metadata Extracter. This extracter uses the POI library to extract
* the following:
* <pre>
* <b>author:</b> – cm:author
* <b>title:</b> – cm:title
* <b>subject:</b> – cm:description
* <b>createDateTime:</b> – cm:created
* <b>lastSaveDateTime:</b> – cm:modified
* <b>comments:</b>
* <b>editTime:</b>
* <b>format:</b>
* <b>keywords:</b>
* <b>lastAuthor:</b>
* <b>lastPrinted:</b>
* <b>osVersion:</b>
* <b>thumbnail:</b>
* <b>pageCount:</b>
* <b>wordCount:</b>
* </pre>
*
* Uses Apache Tika
*
* @author Derek Hulley
* @author Nick Burch
*/
public class EnhancedOfficeExtracter extends TikaPoweredMetadataExtracter
{
private static final String KEY_CREATION_DATE = "creationDate";
private static final String KEY_CREATOR = "creator";
private static final String KEY_DATE = "date";
private static final String KEY_GENERATOR = "generator";
private static final String KEY_INITIAL_CREATOR = "initialCreator";
private static final String KEY_KEYWORD = "keyword";
private static final String KEY_LANGUAGE = "language";
private static final String KEY_PRINT_DATE = "printDate";
private static final String KEY_PRINTED_BY = "printedBy";
private static final String CUSTOM_PREFIX = "custom:";
public static ArrayList<String> SUPPORTED_MIMETYPES = buildSupportedMimetypes(
new String[] {
MimetypeMap.MIMETYPE_WORD,
MimetypeMap.MIMETYPE_EXCEL,
MimetypeMap.MIMETYPE_PPT},
new OfficeParser()
);
static {
// Outlook has it's own one!
SUPPORTED_MIMETYPES.remove(MimetypeMap.MIMETYPE_OUTLOOK_MSG);
}
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss");
public EnhancedOfficeExtracter()
{
super(SUPPORTED_MIMETYPES);
}
@Override
protected Parser getParser() {
return new OfficeParser();
}
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String, String> headers) {
putRawValue(KEY_CREATION_DATE, getDateOrNull(metadata.get(Metadata.CREATION_DATE)), properties);
putRawValue(KEY_CREATOR, metadata.get(Metadata.CREATOR), properties);
putRawValue(KEY_DATE, getDateOrNull(metadata.get(Metadata.DATE)), properties);
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.DESCRIPTION), properties);
putRawValue(KEY_GENERATOR, metadata.get("generator"), properties);
putRawValue(KEY_INITIAL_CREATOR, metadata.get("initial-creator"), properties);
putRawValue(KEY_KEYWORD, metadata.get(Metadata.KEYWORDS), properties);
putRawValue(KEY_LANGUAGE, metadata.get(Metadata.LANGUAGE), properties);
// putRawValue(KEY_PRINT_DATE, getDateOrNull(metadata.get(Metadata.)), rawProperties);
// putRawValue(KEY_PRINTED_BY, metadata.get(Metadata.), rawProperties);
// Handle user-defined properties dynamically
Map<String, Set<QName>> mapping = super.getMapping();
for (String key : mapping.keySet())
{
if (metadata.get(CUSTOM_PREFIX + key) != null)
{
putRawValue(key, metadata.get(CUSTOM_PREFIX + key), properties);
}
}
return properties;
}
private Date getDateOrNull(String dateString)
{
if (dateString != null && dateString.length() != 0)
{
try {
return dateFormat.parse(dateString);
} catch(ParseException e) {}
}
return null;
}
}
#
# OpenDocumentMetadataExtracter - default mapping
#
# author: Derek Hulley
# Namespaces
namespace.prefix.customm=custom.model
# Mappings
creationDate=cm:created
creator=cm:author
date=
description=
generator=
initialCreator=
keyword=
language=
printDate=
printedBy=
subject=cm:description
title=cm:title
# mine
user1=custom:user1
Keywords=custom:keywords
<?xml version="1.0" encoding="UTF-8"?>
<!– Custom Model –>
<!– Note: This model is pre-configured to load at startup of the Repository. So, all custom –>
<!– types and aspects added here will automatically be registered –>
<model name="custom:customModel" xmlns="http://www.alfresco.org/model/dictionary/1.0">
<!– Optional meta-data about the model –>
<description>Custom Model</description>
<author>Colin Sneddon</author>
<version>1.0</version>
<imports>
<!– Import Alfresco Dictionary Definitions –>
<import uri="http://www.alfresco.org/model/dictionary/1.0" prefix="d"/>
<!– Import Alfresco Content Domain Model Definitions –>
<import uri="http://www.alfresco.org/model/content/1.0" prefix="cm"/>
</imports>
<!– Introduction of new namespaces defined by this model –>
<!– NOTE: The following namespace custom.model should be changed to reflect your own namespace –>
<namespaces>
<namespace uri="custom.model" prefix="custom"/>
</namespaces>
<aspects>
<!– Definition of new Content Aspect: myprojectbrowser generic forms –>
<aspect name="custom:genericForm">
<title>myForm</title>
<properties>
<property name="custom:keywords">
<title>keywords</title>
<type>d:text</type>
<protected>false</protected>
<mandatory>false</mandatory>
<multiple>false</multiple>
</property>
<property name="custom:user1">
<title>ProjectID</title>
<type>d:text</type>
<protected>false</protected>
<mandatory>false</mandatory>
<multiple>false</multiple>
</property>
</properties>
</aspect>
</aspects>
</model>
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<!–
This sample show how to modify the mappings properties of the new V2.1 Metadata Extractors.
In this example, in addition to the default mappings, the field 'user1' is mapped to
'cm:description'. The available source properties is described on the Javadocs of the
extracter class.
–>
<beans>
<!– This adds in the extra mapping for the Open Document extractor –>
<bean id="extracter.Office" class="com.mpb.extracter.EnhancedOfficeExtracter" parent="baseMetadataExtracter" >
<property name="inheritDefaultMapping">
<value>true</value>
</property>
<property name="mappingProperties">
<props>
<prop key="namespace.prefix.custom">custom.model</prop>
<prop key="user1">custom:user1</prop>
<prop key="keywords">custom:keywords</prop>
</props>
</property>
</bean>
</beans>
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' ' http://www.springframework.org/dtd/spring-beans.dtd'>
<beans>
<!– Registration of new models –>
<bean id="mpb.dictionaryBootstrap" parent="dictionaryModelBootstrap" depends-on="dictionaryBootstrap">
<property name="models">
<list>
<value>alfresco/extension/model/customModel.xml</value>
</list>
</property>
</bean>
</beans>
<alfresco-config>
<!– Example of overriding the from email address –>
<!–
<config>
<client>
<from-email-address>someone@your-domain.com</from-email-address>
<search-max-results>100</search-max-results>
</client>
</config>
–>
<!– Example of adding languages to the list in the login page –>
<!–
<config evaluator="string-compare" condition="Languages">
<languages>
<language locale="ca_ES">Catalan</language>
<language locale="hr_HR">Croatian</language>
<language locale="cs_CZ">Czech</language>
<language locale="da_DK">Danish</language>
<language locale="de_DE">German</language>
<language locale="es_ES">Spanish</language>
<language locale="el_GR">Greek</language>
<language locale="fi_FI">Finnish</language>
<language locale="fr_FR">French</language>
<language locale="it_IT">Italian</language>
<language locale="ja_JP">Japanese</language>
<language locale="du_NL">Dutch</language>
<language locale="pl_PL">Polish</language>
<language locale="pt_PT">Portuguese</language>
<language locale="pt_BR">Portuguese (Brazilian)</language>
<language locale="ru_RU">Russian</language>
<language locale="sv_SV">Swedish</language>
<language locale="tr_TR">Turkish</language>
<language locale="zh_CN">Simplified Chinese</language>
</languages>
</config>
–>
<!– Example of configuring advanced search –>
<!–
<config evaluator="string-compare" condition="Advanced Search">
<advanced-search>
<content-types>
</content-types>
<custom-properties>
<meta-data aspect="app:simpleworkflow" property="app:approveStep" />
</custom-properties>
</advanced-search>
</config>
–>
<!– Example of changing the sort direction for a view in the client –>
<!–
<config evaluator="string-compare" condition="Views">
<views>
<view-defaults>
<topic>
<sort-direction>ascending</sort-direction>
</topic>
</view-defaults>
</views>
</config>
–>
<!– Example of adding a custom icon to the Create Space dialog –>
<!–
<config evaluator="string-compare" condition="cm:folder icons">
<icons>
<icon name="space-icon-custom" path="/images/icons/space-icon-custom.gif" />
</icons>
</config>
–>
<!– The config below shows how to incorporate the example model–>
<!– into the web client, for this to work you will need to –>
<!– rename example-model-context.xml.sample to example-model-context.xml –>
<!–
<config evaluator="string-compare" condition="Content Wizards">
<content-types>
<type name="my:sop" />
</content-types>
</config>
<config evaluator="node-type" condition="my:sop">
<property-sheet>
<show-property name="mimetype" display-label-id="content_type"
component-generator="MimeTypeSelectorGenerator" />
<show-property name="size" display-label-id="size"
converter="org.alfresco.faces.ByteSizeConverter"
show-in-edit-mode="false" />
<show-property name="my:publishedDate" />
<show-association name="my:signOff" />
<show-property name="my:authorisedBy" />
<show-child-association name="my:processSteps" />
</property-sheet>
</config>
<config evaluator="aspect-name" condition="my:imageClassification">
<property-sheet>
<show-property name="my:width"/>
<show-property name="my:height"/>
<show-property name="my:resolution"/>
</property-sheet>
</config>
<config evaluator="aspect-name" condition="cm:storeSelector">
<property-sheet>
<show-property name="cm:storeName" />
</property-sheet>
</config>
<config evaluator="string-compare" condition="Action Wizards">
<aspects>
<aspect name="cm:storeSelector"/>
</aspects>
</config>
<config evaluator="string-compare" condition="Action Wizards">
<aspects>
<aspect name="my:imageClassification"/>
</aspects>
</config>
<config evaluator="string-compare" condition="Advanced Search">
<advanced-search>
<content-types>
<type name="my:sop" />
</content-types>
<custom-properties>
<meta-data type="my:sop" property="my:authorisedBy" />
<meta-data aspect="my:imageClassification" property="my:resolution" />
</custom-properties>
</advanced-search>
</config>
–>
<!– Lists the custom aspect in business rules Action wizard –>
<config evaluator="string-compare" condition="Action Wizards">
<aspects>
<aspect name="custom:genericForm"/>
</aspects>
</config>
<!– Displays the properties in view details page –>
<config evaluator="aspect-name" condition="custom:genericForm">
<property-sheet>
<separator name="sepCust1" display-label="My Forms" component-generator="HeaderSeparatorGenerator" />
<show-property name="custom:keywords"/>
<show-property name="custom:user1"/>
</property-sheet>
</config>
</alfresco-config>
07-21-2011 03:41 PM
07-21-2011 04:09 PM
07-22-2011 02:41 AM
07-22-2011 04:53 AM
07-22-2011 04:59 AM
07-22-2011 05:14 AM
07-22-2011 05:23 AM
10:20:52,117 INFO [org.alfresco.config.JndiPropertiesFactoryBean] Loading properties file from class path resource [alfresco/repository.properties]
10:20:52,124 INFO [org.alfresco.config.JndiPropertiesFactoryBean] Loading properties file from class path resource [alfresco/domain/transaction.properties]
10:20:52,125 INFO [org.alfresco.config.JndiPropertiesFactoryBean] Loading properties file from file [/opt/alfresco/tomcat/webapps/alfresco/WEB-INF/classes/alfresco/module/test/alfresco-global.properties]
10:20:52,125 INFO [org.alfresco.config.JndiPropertiesFactoryBean] Loading properties file from file [/opt/alfresco/tomcat/webapps/alfresco/WEB-INF/classes/alfresco/module/org_alfresco_module_dod5015/alfresco-global.properties]
10:20:52,125 INFO [org.alfresco.config.JndiPropertiesFactoryBean] Loading properties file from file [/opt/alfresco/tomcat/webapps/alfresco/WEB-INF/classes/alfresco/module/tests/alfresco-global.properties]
10:20:52,125 INFO [org.alfresco.config.JndiPropertiesFactoryBean] Loading properties file from URL [file:/opt/alfresco/tomcat/shared/classes/alfresco-global.properties]
10:20:52,189 INFO [org.alfresco.config.JndiPropertyPlaceholderConfigurer] Loading properties file from class path resource [alfresco/alfresco-shared.properties]
10:20:52,370 INFO [org.alfresco.config.FixedPropertyPlaceholderConfigurer] Loading properties file from class path resource [alfresco/version.properties]
10:20:52,432 INFO [org.alfresco.config.FixedPropertyPlaceholderConfigurer] Loading properties file from class path resource [alfresco/domain/cache-strategies.properties]
10:20:52,475 INFO [org.alfresco.config.FixedPropertyPlaceholderConfigurer] Loading properties file from class path resource [alfresco/module/org.alfresco.module.vti/context/vti.properties]
10:20:52,476 INFO [org.alfresco.config.FixedPropertyPlaceholderConfigurer] Loading properties file from URL [file:/opt/alfresco/tomcat/shared/classes/alfresco/extension/custom-vti.properties]
10:21:03,463 ERROR [org.springframework.web.context.ContextLoader] Context initialization failed
org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'extracter.Office' defined in file [/opt/alfresco/tomcat/webapps/alfresco/WEB-INF/classes/alfresco/extension/custom-metadata-extractors-context.xml]: Invocation of init method failed; nested exception is org.alfresco.error.AlfrescoRuntimeException: 06220001 Unable to load properties file to read extracter mapping properties:
Extracter: com.mpb.extracter.EnhancedOfficeExtracter@77edd56f
Bundle: com/mpb/extracter/EnhancedOfficeExtracter.properties
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1401)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:512)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:450)
at org.springframework.beans.factory.support.AbstractBeanFactory$1.getObject(AbstractBeanFactory.java:290)
at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:222)
at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:287)
at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:189)
at org.springframework.beans.factory.support.DefaultListableBeanFactory.preInstantiateSingletons(DefaultListableBeanFactory.java:557)
at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:842)
at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:416)
at org.springframework.web.context.ContextLoader.createWebApplicationContext(ContextLoader.java:261)
at org.springframework.web.context.ContextLoader.initWebApplicationContext(ContextLoader.java:192)
at org.springframework.web.context.ContextLoaderListener.contextInitialized(ContextLoaderListener.java:47)
at org.alfresco.web.app.ContextLoaderListener.contextInitialized(ContextLoaderListener.java:63)
at org.apache.catalina.core.StandardContext.listenerStart(StandardContext.java:3972)
at org.apache.catalina.core.StandardContext.start(StandardContext.java:4467)
at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:791)
at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:771)
at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:546)
at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:637)
at org.apache.catalina.startup.HostConfig.deployDescriptors(HostConfig.java:563)
at org.apache.catalina.startup.HostConfig.deployApps(HostConfig.java:498)
at org.apache.catalina.startup.HostConfig.start(HostConfig.java:1277)
at org.apache.catalina.startup.HostConfig.lifecycleEvent(HostConfig.java:321)
at org.apache.catalina.util.LifecycleSupport.fireLifecycleEvent(LifecycleSupport.java:119)
at org.apache.catalina.core.ContainerBase.start(ContainerBase.java:1053)
at org.apache.catalina.core.StandardHost.start(StandardHost.java:785)
at org.apache.catalina.core.ContainerBase.start(ContainerBase.java:1045)
at org.apache.catalina.core.StandardEngine.start(StandardEngine.java:443)
at org.apache.catalina.core.StandardService.start(StandardService.java:519)
at org.apache.catalina.core.StandardServer.start(StandardServer.java:710)
at org.apache.catalina.startup.Catalina.start(Catalina.java:581)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.catalina.startup.Bootstrap.start(Bootstrap.java:289)
at org.apache.catalina.startup.Bootstrap.main(Bootstrap.java:414)
Caused by: org.alfresco.error.AlfrescoRuntimeException: 06220001 Unable to load properties file to read extracter mapping properties:
Extracter: com.mpb.extracter.EnhancedOfficeExtracter@77edd56f
Bundle: com/mpb/extracter/EnhancedOfficeExtracter.properties
at org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter.readMappingProperties(AbstractMappingMetadataExtracter.java:411)
at org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter.getDefaultMapping(AbstractMappingMetadataExtracter.java:1017)
at org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter.init(AbstractMappingMetadataExtracter.java:531)
at org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter.register(AbstractMappingMetadataExtracter.java:514)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeCustomInitMethod(AbstractAutowireCapableBeanFactory.java:1529)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1468)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1398)
… 37 more
Caused by: org.alfresco.error.AlfrescoRuntimeException: 06220000 No prefix mapping for extracter property mapping:
Extracter: com.mpb.extracter.EnhancedOfficeExtracter@77edd56f
Mapping: creator=cm:author
at org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter.readMappingProperties(AbstractMappingMetadataExtracter.java:475)
at org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter.readMappingProperties(AbstractMappingMetadataExtracter.java:401)
… 47 more
10:21:10,502 INFO [org.springframework.extensions.webscripts.DeclarativeRegistry] Registered 305 Web Scripts (+0 failed), 315 URLs
10:21:10,503 INFO [org.springframework.extensions.webscripts.DeclarativeRegistry] Registered 8 Package Description Documents (+0 failed)
10:21:10,503 INFO [org.springframework.extensions.webscripts.DeclarativeRegistry] Registered 0 Schema Description Documents (+0 failed)
10:21:10,676 INFO [org.springframework.extensions.webscripts.AbstractRuntimeContainer] Initialised Spring Surf Container Web Script Container (in 2290.653ms)
10:21:10,761 INFO [org.springframework.extensions.webscripts.TemplateProcessorRegistry] Registered template processor freemarker for extension ftl
10:21:10,889 INFO [org.springframework.extensions.webscripts.ScriptProcessorRegistry] Registered script processor javascript for extension js
10:21:11,098 INFO [org.springframework.extensions.webscripts.TemplateProcessorRegistry] Registered template processor freemarker for extension ftl
10:21:11,101 INFO [org.springframework.extensions.webscripts.ScriptProcessorRegistry] Registered script processor javascript for extension js
10:21:11,300 INFO [org.springframework.extensions.webscripts.TemplateProcessorRegistry] Registered template processor freemarker for extension ftl
10:21:11,304 INFO [org.springframework.extensions.webscripts.ScriptProcessorRegistry] Registered script processor javascript for extension js
07-22-2011 05:35 AM
Tags
Find what you came for
We want to make your experience in Hyland Connect as valuable as possible, so we put together some helpful links.