<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Alfresco Developer Guide,  PdfBoxMetadataExtracter problem in Alfresco Archive</title>
    <link>https://connect.hyland.com/t5/alfresco-archive/alfresco-developer-guide-pdfboxmetadataextracter-problem/m-p/257317#M210447</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;I am following the guide in the book Alfresco Developer Guide but have encountered a something I don't understand.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;In chapter 4, Digging into the developer class the book refers to &lt;/SPAN&gt;&lt;PRE class="language-none line-numbers"&gt;&lt;CODE&gt;org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter class&lt;SPAN class="line-numbers-rows"&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;&lt;BR /&gt;&lt;SPAN&gt;and describe the code below:&lt;/SPAN&gt;&lt;BR /&gt;&lt;PRE class="language-none line-numbers"&gt;&lt;CODE&gt;PDDocumentInformation docInfo = pdf.getDocumentInformation(); putRawValue(KEY_AUTHOR, docInfo.getAuthor(), rawProperties); putRawValue(KEY_TITLE, docInfo.getTitle(), rawProperties); putRawValue(KEY_SUBJECT, docInfo.getSubject(), rawProperties); Calendar created = docInfo.getCreationDate(); if (created != null) { putRawValue(KEY_CREATED, created.getTime(), rawProperties); }&lt;SPAN class="line-numbers-rows"&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;&lt;BR /&gt;&lt;SPAN&gt;The problem is i looked in the code in:&lt;/SPAN&gt;&lt;BR /&gt;&lt;PRE class="language-none line-numbers"&gt;&lt;CODE&gt;SDK AlfrescoEmbedded/source/&lt;A href="http://alfresco-repository-3.4.d.jar/org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter" rel="nofollow noopener noreferrer"&gt;alfresco-repository-3.4.d.jar/org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter&lt;/A&gt; class&lt;SPAN class="line-numbers-rows"&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;&lt;SPAN&gt; &lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;which contains the following code:&lt;/SPAN&gt;&lt;BR /&gt;&lt;PRE class="language-none line-numbers"&gt;&lt;CODE&gt;* Copyright (C) 2005 Jesper Steen M√∏ller&lt;BR /&gt; *&lt;BR /&gt; * This file is part of Alfresco&lt;BR /&gt; *&lt;BR /&gt; * Alfresco is free software: you can redistribute it and/or modify&lt;BR /&gt; * it under the terms of the GNU Lesser General Public License as published by&lt;BR /&gt; * the Free Software Foundation, either version 3 of the License, or&lt;BR /&gt; * (at your option) any later version.&lt;BR /&gt; *&lt;BR /&gt; * Alfresco is distributed in the hope that it will be useful,&lt;BR /&gt; * but WITHOUT ANY WARRANTY; without even the implied warranty of&lt;BR /&gt; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.&amp;nbsp; See the&lt;BR /&gt; * GNU Lesser General Public License for more details.&lt;BR /&gt; *&lt;BR /&gt; * You should have received a copy of the GNU Lesser General Public License&lt;BR /&gt; * along with Alfresco. If not, see &amp;lt;&lt;A href="http://www.gnu.org/licenses/" rel="nofollow noopener noreferrer"&gt;http://www.gnu.org/licenses/&lt;/A&gt;&amp;gt;.&lt;BR /&gt; */&lt;BR /&gt;package org.alfresco.repo.content.metadata;&lt;BR /&gt;&lt;BR /&gt;import java.util.ArrayList;&lt;BR /&gt;&lt;BR /&gt;import org.alfresco.repo.content.MimetypeMap;&lt;BR /&gt;import org.apache.commons.logging.Log;&lt;BR /&gt;import org.apache.commons.logging.LogFactory;&lt;BR /&gt;import org.apache.tika.parser.Parser;&lt;BR /&gt;import org.apache.tika.parser.pdf.PDFParser;&lt;BR /&gt;&lt;BR /&gt;/**&lt;BR /&gt; * Metadata extractor for the PDF documents.&lt;BR /&gt; * &amp;lt;pre&amp;gt;&lt;BR /&gt; *&amp;nbsp;&amp;nbsp; &amp;lt;b&amp;gt;author:&amp;lt;/b&amp;gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; –&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cm:author&lt;BR /&gt; *&amp;nbsp;&amp;nbsp; &amp;lt;b&amp;gt;title:&amp;lt;/b&amp;gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; –&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cm:title&lt;BR /&gt; *&amp;nbsp;&amp;nbsp; &amp;lt;b&amp;gt;subject:&amp;lt;/b&amp;gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; –&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cm:description&lt;BR /&gt; *&amp;nbsp;&amp;nbsp; &amp;lt;b&amp;gt;created:&amp;lt;/b&amp;gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; –&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cm:created&lt;BR /&gt; *&amp;nbsp;&amp;nbsp; &amp;lt;b&amp;gt;(custom metadata):&amp;lt;/b&amp;gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; –&lt;BR /&gt; * &amp;lt;/pre&amp;gt;&lt;BR /&gt; * &lt;BR /&gt; * Uses Apache Tika&lt;BR /&gt; * &lt;BR /&gt; * @author Jesper Steen M√∏ller&lt;BR /&gt; * @author Derek Hulley&lt;BR /&gt; */&lt;BR /&gt;public class PdfBoxMetadataExtracter extends TikaPoweredMetadataExtracter&lt;BR /&gt;{&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; protected static Log pdfLogger = LogFactory.getLog(PdfBoxMetadataExtracter.class);&lt;BR /&gt;&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; public static ArrayList&amp;lt;String&amp;gt; SUPPORTED_MIMETYPES = buildSupportedMimetypes(&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; new String[] { MimetypeMap.MIMETYPE_PDF },&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; new PDFParser()&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; );&lt;BR /&gt;&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; public PdfBoxMetadataExtracter()&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; {&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; super(SUPPORTED_MIMETYPES);&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; @Override&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; protected Parser getParser() {&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; return new PDFParser();&lt;BR /&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&lt;BR /&gt;}&lt;BR /&gt;&lt;SPAN class="line-numbers-rows"&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;&lt;BR /&gt;&lt;SPAN&gt;I'm puzzled as I was expeting to see the code: DDocumentInformation docInfo = pdf.g …&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Mon, 18 Jul 2011 09:37:49 GMT</pubDate>
    <dc:creator>col_edinburgh</dc:creator>
    <dc:date>2011-07-18T09:37:49Z</dc:date>
    <item>
      <title>Alfresco Developer Guide,  PdfBoxMetadataExtracter problem</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/alfresco-developer-guide-pdfboxmetadataextracter-problem/m-p/257317#M210447</link>
      <description>I am following the guide in the book Alfresco Developer Guide but have encountered a something I don't understand.In chapter 4, Digging into the developer class the book refers to org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter class‍and describe the code below&lt;IMG id="smileytongue" class="emoticon emoticon-smileytongue" src="https://migration33.stage.lithium.com/i/smilies/16x16_smiley-tongue.png" alt="Smiley Tongue" title="Smiley Tongue" /&gt;DDocumentInformation docIn</description>
      <pubDate>Mon, 18 Jul 2011 09:37:49 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/alfresco-developer-guide-pdfboxmetadataextracter-problem/m-p/257317#M210447</guid>
      <dc:creator>col_edinburgh</dc:creator>
      <dc:date>2011-07-18T09:37:49Z</dc:date>
    </item>
    <item>
      <title>Re: Alfresco Developer Guide,  PdfBoxMetadataExtracter problem</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/alfresco-developer-guide-pdfboxmetadataextracter-problem/m-p/257318#M210448</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Since that example was written, it looks like the "old" PDFBox metadata extractor has been replaced with a Apache Tika based extractor.&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;However, from the few lines you have given, it still looks like a good example, even though it does not match the current code.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 19 Jul 2011 07:06:56 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/alfresco-developer-guide-pdfboxmetadataextracter-problem/m-p/257318#M210448</guid>
      <dc:creator>mrogers</dc:creator>
      <dc:date>2011-07-19T07:06:56Z</dc:date>
    </item>
    <item>
      <title>Re: Alfresco Developer Guide,  PdfBoxMetadataExtracter problem</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/alfresco-developer-guide-pdfboxmetadataextracter-problem/m-p/257319#M210449</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;i'm now totally lost.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Using the examples in the Developer guide, I have imported into Eclipse the code from &lt;/SPAN&gt;&lt;PRE class="language-none line-numbers"&gt;&lt;CODE&gt;&lt;A href="http://www.packtpub.com/files/code/3117_Code.zip" rel="nofollow noopener noreferrer"&gt;http://www.packtpub.com/files/code/3117_Code.zip&lt;/A&gt;&lt;SPAN class="line-numbers-rows"&gt;&lt;SPAN&gt;‍&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;&lt;BR /&gt;&lt;SPAN&gt;run Ant Build on the code from Chapter 2 example and copied to Alfresco and its works.&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;run Ant Build on the code from Chapter 3 example and copied to Alfresco and its works.&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;run Ant Build on the code from Chapter 4 example and copied to Alfresco and it fails - http 404&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;one step forward and two back.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 19 Jul 2011 10:12:25 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/alfresco-developer-guide-pdfboxmetadataextracter-problem/m-p/257319#M210449</guid>
      <dc:creator>col_edinburgh</dc:creator>
      <dc:date>2011-07-19T10:12:25Z</dc:date>
    </item>
    <item>
      <title>Re: Alfresco Developer Guide,  PdfBoxMetadataExtracter problem</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/alfresco-developer-guide-pdfboxmetadataextracter-problem/m-p/257320#M210450</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;BLOCKQUOTE class="jive-quote"&gt;Since that example was written, it looks like the "old" PDFBox metadata extractor has been replaced with a Apache Tika based extractor.&lt;/BLOCKQUOTE&gt;&lt;BR /&gt;&lt;SPAN&gt;Thanks, back to thew drawing board&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 19 Jul 2011 10:37:15 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/alfresco-developer-guide-pdfboxmetadataextracter-problem/m-p/257320#M210450</guid>
      <dc:creator>col_edinburgh</dc:creator>
      <dc:date>2011-07-19T10:37:15Z</dc:date>
    </item>
  </channel>
</rss>

