<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: parsing .doc word content? in Alfresco Archive</title>
    <link>https://connect.hyland.com/t5/alfresco-archive/parsing-doc-word-content/m-p/212745#M165875</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;You can implement your own extracter in Alfresco:&lt;/SPAN&gt;&lt;BR /&gt;&lt;A href="http://wiki.alfresco.com/wiki/Metadata_Extraction" rel="nofollow noopener noreferrer"&gt;http://wiki.alfresco.com/wiki/Metadata_Extraction&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Maybe it could be useful to parse the .doc word file using Apache POI:&lt;/SPAN&gt;&lt;BR /&gt;&lt;A href="http://poi.apache.org/" rel="nofollow noopener noreferrer"&gt;http://poi.apache.org/&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Alfresco includes Apache POI 3.1, so you can start to implement your extracter without adding any other libraries&amp;nbsp; :wink: &lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Hope this helps.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Fri, 12 Mar 2010 11:29:45 GMT</pubDate>
    <dc:creator>openpj</dc:creator>
    <dc:date>2010-03-12T11:29:45Z</dc:date>
    <item>
      <title>parsing .doc word content?</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/parsing-doc-word-content/m-p/212744#M165874</link>
      <description>I need to parse the content of a .doc word file for metadata after the file in added to the alfresco repository.&amp;nbsp; What is the best way of doing this?</description>
      <pubDate>Thu, 18 Feb 2010 13:06:08 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/parsing-doc-word-content/m-p/212744#M165874</guid>
      <dc:creator>targa2000</dc:creator>
      <dc:date>2010-02-18T13:06:08Z</dc:date>
    </item>
    <item>
      <title>Re: parsing .doc word content?</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/parsing-doc-word-content/m-p/212745#M165875</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;You can implement your own extracter in Alfresco:&lt;/SPAN&gt;&lt;BR /&gt;&lt;A href="http://wiki.alfresco.com/wiki/Metadata_Extraction" rel="nofollow noopener noreferrer"&gt;http://wiki.alfresco.com/wiki/Metadata_Extraction&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Maybe it could be useful to parse the .doc word file using Apache POI:&lt;/SPAN&gt;&lt;BR /&gt;&lt;A href="http://poi.apache.org/" rel="nofollow noopener noreferrer"&gt;http://poi.apache.org/&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Alfresco includes Apache POI 3.1, so you can start to implement your extracter without adding any other libraries&amp;nbsp; :wink: &lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Hope this helps.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 12 Mar 2010 11:29:45 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/parsing-doc-word-content/m-p/212745#M165875</guid>
      <dc:creator>openpj</dc:creator>
      <dc:date>2010-03-12T11:29:45Z</dc:date>
    </item>
  </channel>
</rss>

