<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Search inside Pdf files in Alfresco Archive</title>
    <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144236#M100873</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Do you see any errors in the logs?&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Any other users out there have the same issue?&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Thanks,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Kevin&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Tue, 03 Jun 2008 19:54:39 GMT</pubDate>
    <dc:creator>kevinr</dc:creator>
    <dc:date>2008-06-03T19:54:39Z</dc:date>
    <item>
      <title>Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144231#M100868</link>
      <description>Hi! I'm testing Alfresco 2.1 and 2.2 and I have a question about to search contents inside Pdf files.Using the Web Client I'm trying to search some strings that are presents inside Pdf files but no one Pdf file is returned from the query.The search is ok when I search text that are presents inside .</description>
      <pubDate>Thu, 04 Oct 2007 20:17:50 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144231#M100868</guid>
      <dc:creator>icarrara</dc:creator>
      <dc:date>2007-10-04T20:17:50Z</dc:date>
    </item>
    <item>
      <title>Re: Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144232#M100869</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;P.S.: Note that actually OpenOffice is NOT installed on the server where Alfresco is running.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 04 Oct 2007 20:19:05 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144232#M100869</guid>
      <dc:creator>icarrara</dc:creator>
      <dc:date>2007-10-04T20:19:05Z</dc:date>
    </item>
    <item>
      <title>Re: Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144233#M100870</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;If you open the PDF file in a PDF reader such as Acrobat, can you select the words as text to paste them elsewhere, or does it only offer to 'Copy Image'?&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;If it's the latter, the PDF has no text content that Alfresco can index, and you'd need to OCR the document first.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 10 Oct 2007 13:04:12 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144233#M100870</guid>
      <dc:creator>cricalix</dc:creator>
      <dc:date>2007-10-10T13:04:12Z</dc:date>
    </item>
    <item>
      <title>Re: Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144234#M100871</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;OpenOffice is not used for PDF to text conversion. We use a library called PDFBox for that. It works with 99% of PDFs we have tried - but as user 'cricalix' noted, it can only index plain text in the document.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Thanks,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Kevin&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 17 Oct 2007 13:45:16 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144234#M100871</guid>
      <dc:creator>kevinr</dc:creator>
      <dc:date>2007-10-17T13:45:16Z</dc:date>
    </item>
    <item>
      <title>Re: Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144235#M100872</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Hi !!!&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;I've deplyed alfresco.war in a Tomcat environment, and I'm having the same situation: search doesn't work with .pdf files.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;My .pdf files contains text, so that I can copy its content…&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Can you help me????&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 03 Jun 2008 18:24:48 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144235#M100872</guid>
      <dc:creator>braulio_moura</dc:creator>
      <dc:date>2008-06-03T18:24:48Z</dc:date>
    </item>
    <item>
      <title>Re: Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144236#M100873</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Do you see any errors in the logs?&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Any other users out there have the same issue?&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Thanks,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Kevin&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 03 Jun 2008 19:54:39 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144236#M100873</guid>
      <dc:creator>kevinr</dc:creator>
      <dc:date>2008-06-03T19:54:39Z</dc:date>
    </item>
    <item>
      <title>Re: Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144237#M100874</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;we have an alfresco 3.0 labs with 270,000 documents PDF with ocr and alfresco does not search inside them&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;can someone please let us know if we have to activate something there?&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;regards&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;adrian cadena&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 11 Feb 2010 16:50:47 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144237#M100874</guid>
      <dc:creator>fugu</dc:creator>
      <dc:date>2010-02-11T16:50:47Z</dc:date>
    </item>
    <item>
      <title>Re: Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144238#M100875</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;I must ask have you turned on advanced search and&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;Content Format: adobe pdf document&amp;nbsp; type and&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;Show me results for: File names and contents&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;this works for me nice.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 03 Mar 2010 01:43:28 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144238#M100875</guid>
      <dc:creator>savic_prvoslav</dc:creator>
      <dc:date>2010-03-03T01:43:28Z</dc:date>
    </item>
    <item>
      <title>Re: Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144239#M100876</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;I don't know if this is related, but it might be if the PDF content being stored in Alfresco is generated by running the OpenOffice PDF converter.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;We use Alfresco to allow users to upload MS Office documents to an Alfresco DM repository in 2.1.1E which are converted to PDF by Alfresco using OpenOffice before being stored in the repository. The issue users are reporting is that trying to search inside the OpenOffice generated PDF's doesn't work as expected when viewing the PDF's in Adobe reader. The search results inside the Adobe Reader search don't take you to the correct location / content in the PDF.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Perhaps there is an issue with indexing of the PDF content by the Lucene search engine because of this?&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Clearly this won't apply if the PDF's are generated in another manner and then uploaded to the repo.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 11 Mar 2010 12:15:00 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144239#M100876</guid>
      <dc:creator>chrisb</dc:creator>
      <dc:date>2010-03-11T12:15:00Z</dc:date>
    </item>
    <item>
      <title>Re: Search inside Pdf files</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144240#M100877</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;You can try TextFinding.com to search inside pdf files. It is perfectly useful to search pdf for you. &lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 14 Jul 2014 03:13:59 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/search-inside-pdf-files/m-p/144240#M100877</guid>
      <dc:creator>yunda</dc:creator>
      <dc:date>2014-07-14T03:13:59Z</dc:date>
    </item>
  </channel>
</rss>

