<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Extraction of content from PDF file in Alfresco Archive</title>
    <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290899#M244029</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Hi,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;actually its already configured in alfresco. but we are not able to find where it is stored and we are not getting how we can get that metadata into some text file. so it would be helpful if ou can provide screen shots for the same problem.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Ashwini&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Tue, 19 Mar 2013 13:43:14 GMT</pubDate>
    <dc:creator>ashwini_g_krish</dc:creator>
    <dc:date>2013-03-19T13:43:14Z</dc:date>
    <item>
      <title>Extraction of content from PDF file</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290897#M244027</link>
      <description>Hi,we are working on Alfresco, and our basic requirement is to extract metadata from a PDF file and also its content and save the extracted metadata and content in a text file like csv or .txt files.so kindly help me out with the same.</description>
      <pubDate>Mon, 18 Mar 2013 12:27:07 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290897#M244027</guid>
      <dc:creator>ashwini_g_krish</dc:creator>
      <dc:date>2013-03-18T12:27:07Z</dc:date>
    </item>
    <item>
      <title>Re: Extraction of content from PDF file</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290898#M244028</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;I think you need to create custom metadata extractor for your requirement.&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;Please refer following link and let me know if you have any doubts.&lt;/SPAN&gt;&lt;BR /&gt;&lt;A href="http://wiki.alfresco.com/wiki/Metadata_Extraction" rel="nofollow noopener noreferrer"&gt;http://wiki.alfresco.com/wiki/Metadata_Extraction&lt;/A&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 19 Mar 2013 11:52:05 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290898#M244028</guid>
      <dc:creator>mitpatoliya</dc:creator>
      <dc:date>2013-03-19T11:52:05Z</dc:date>
    </item>
    <item>
      <title>Re: Extraction of content from PDF file</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290899#M244029</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Hi,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;actually its already configured in alfresco. but we are not able to find where it is stored and we are not getting how we can get that metadata into some text file. so it would be helpful if ou can provide screen shots for the same problem.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Ashwini&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 19 Mar 2013 13:43:14 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290899#M244029</guid>
      <dc:creator>ashwini_g_krish</dc:creator>
      <dc:date>2013-03-19T13:43:14Z</dc:date>
    </item>
    <item>
      <title>Re: Extraction of content from PDF file</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290900#M244030</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Ashwini,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Ideally what metadata extractor does is it get the properties from the files which we are uploading and attach it to the newly created content in the alfresco as metadata.&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;Ex. &lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;When any word doc or pdf document lying in your c or d drive it has some set of properties like author,name,title etc… right?&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;When you upload that in to alfresco&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;metadataextractor comes in to picture and extract those property and attach it to the newly created file in alfresco as metadata of the file according to the model in alfresco.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;As per your requirement what you need to do is either extend the handler class of metadata extractor to achieve what you are looking for&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;or&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;create a script which read those property and create txt file (simpler approch)&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 20 Mar 2013 05:56:15 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290900#M244030</guid>
      <dc:creator>mitpatoliya</dc:creator>
      <dc:date>2013-03-20T05:56:15Z</dc:date>
    </item>
    <item>
      <title>Re: Extraction of content from PDF file</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290901#M244031</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Hi Mits,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Thank you for the reply. It will be helpful for us. i will create the script but the confusion is where to put the script and how we can run that script?? &lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;we are able to run the rules but we are not getting where the data is getting stored and can we specify the destination folder while creating rules.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 20 Mar 2013 09:29:00 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290901#M244031</guid>
      <dc:creator>ashwini_g_krish</dc:creator>
      <dc:date>2013-03-20T09:29:00Z</dc:date>
    </item>
    <item>
      <title>Re: Extraction of content from PDF file</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290902#M244032</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Put script in Data Dictionary&amp;gt;Scripts&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;Create a rule which will execute that script on arrival of pdf files.&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;That script should create txt file, read all meatadatas from that pdf file and put it in new file created.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 21 Mar 2013 07:35:59 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290902#M244032</guid>
      <dc:creator>mitpatoliya</dc:creator>
      <dc:date>2013-03-21T07:35:59Z</dc:date>
    </item>
    <item>
      <title>Re: Extraction of content from PDF file</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290903#M244033</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Thank you for the solution.&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;can u please provide me a simple script for getiing the filename of uploaded file that works in alfresco?&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 21 Mar 2013 08:59:00 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290903#M244033</guid>
      <dc:creator>ashwini_g_krish</dc:creator>
      <dc:date>2013-03-21T08:59:00Z</dc:date>
    </item>
    <item>
      <title>Re: Extraction of content from PDF file</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290904#M244034</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;var filename=document.properties.name;&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;document object will be readily available in the context when your invoke the script via rule on document arrival.&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;It points to the current document which is getting uploaded.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 22 Mar 2013 06:56:00 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290904#M244034</guid>
      <dc:creator>mitpatoliya</dc:creator>
      <dc:date>2013-03-22T06:56:00Z</dc:date>
    </item>
    <item>
      <title>Re: Extraction of content from PDF file</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290905#M244035</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Thank you so much for the reply.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 22 Mar 2013 13:31:11 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extraction-of-content-from-pdf-file/m-p/290905#M244035</guid>
      <dc:creator>ashwini_g_krish</dc:creator>
      <dc:date>2013-03-22T13:31:11Z</dc:date>
    </item>
  </channel>
</rss>

