<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Getting Search to include content of PDFs in Alfresco Archive</title>
    <link>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304197#M257327</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;I am a new user to Alfresco.&amp;nbsp; I am making a library database for my company and have found Alfresco to be useful and easy to use.&amp;nbsp; The problem I am encountering is: I have content uploaded to the site in the form of PDF.&amp;nbsp; When I do a search the search does not include the actual content of the PDFs but only includes results of the titles of the PDFs.&amp;nbsp; I read somewhere to enable an advanced search but can not figure this out.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;PLEASE HELP!!!&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Fri, 26 Apr 2013 19:39:10 GMT</pubDate>
    <dc:creator>kyle_moyer</dc:creator>
    <dc:date>2013-04-26T19:39:10Z</dc:date>
    <item>
      <title>Getting Search to include content of PDFs</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304197#M257327</link>
      <description>I am a new user to Alfresco.&amp;nbsp; I am making a library database for my company and have found Alfresco to be useful and easy to use.&amp;nbsp; The problem I am encountering is: I have content uploaded to the site in the form of PDF.&amp;nbsp; When I do a search the search does not include the actual content of the PDFs</description>
      <pubDate>Fri, 26 Apr 2013 19:39:10 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304197#M257327</guid>
      <dc:creator>kyle_moyer</dc:creator>
      <dc:date>2013-04-26T19:39:10Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Search to include content of PDFs</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304198#M257328</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;By default Alfresco will index the content of your PDF files. The requirement is that the PDF be text, not an image. There is nothing more you have to do.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Can you upload other types of files (like Word docs and text files) and search or the contents of those files?&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Jeff&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 26 Apr 2013 22:36:20 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304198#M257328</guid>
      <dc:creator>jpotts</dc:creator>
      <dc:date>2013-04-26T22:36:20Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Search to include content of PDFs</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304199#M257329</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;I have uploaded multiple word documents and it appears that the search does include the content of the word documents but not the .PDF– Is there a way to convert the PDF to "Text" PDF, or is there a way to tell which kind of PDF I have.&amp;nbsp; I do have OCR software and thought I scanned the documents in as searchable documents.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 29 Apr 2013 17:26:00 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304199#M257329</guid>
      <dc:creator>kyle_moyer</dc:creator>
      <dc:date>2013-04-29T17:26:00Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Search to include content of PDFs</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304200#M257330</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;See if any of the PDF tools referenced in this thread will help: &lt;/SPAN&gt;&lt;A href="http://stackoverflow.com/questions/1489733/how-to-know-if-a-pdf-contains-only-images-or-has-been-ocr-scanned-for-searching" rel="nofollow noopener noreferrer"&gt;http://stackoverflow.com/questions/1489733/how-to-know-if-a-pdf-contains-only-images-or-has-been-ocr-scanned-for-searching&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Jeff&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 29 Apr 2013 18:22:26 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304200#M257330</guid>
      <dc:creator>jpotts</dc:creator>
      <dc:date>2013-04-29T18:22:26Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Search to include content of PDFs</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304201#M257331</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Kyle,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;If one of those tools does not work, you can also use Acrobat to scan documents to make the searchable. When you scan a form normally it comes out as just an image file. Using Acrobat, you can scan and make the documents searchable.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;BW&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 30 Apr 2013 17:23:35 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304201#M257331</guid>
      <dc:creator>bwideman30</dc:creator>
      <dc:date>2013-04-30T17:23:35Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Search to include content of PDFs</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304202#M257332</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi Jeff,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I also have to capture a bunch of paper documents using KOFAX Capture, and I'd like to make the OCRised (fulltext) content available for searching, along with&amp;nbsp;"real" document, a PDF containing an image, available for download in alfresco and previewable in share.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Any simple solution to "manually" create the full-text indexes in solr, while still having the&amp;nbsp;"normal" PDF in the repository ?&lt;/P&gt;&lt;P&gt;Or should I use a custom property and include it in the default search pattern ?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 10 Apr 2018 08:48:51 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304202#M257332</guid>
      <dc:creator>jservajean</dc:creator>
      <dc:date>2018-04-10T08:48:51Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Search to include content of PDFs</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304203#M257333</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hello.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If your PDF files are already OCRised, there is nothing more you have to do.&lt;/P&gt;&lt;P&gt;Simply upload them into Alfresco and the content will be searchable.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;That is how it works by default.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 10 Apr 2018 14:34:04 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/getting-search-to-include-content-of-pdfs/m-p/304203#M257333</guid>
      <dc:creator>douglascrp</dc:creator>
      <dc:date>2018-04-10T14:34:04Z</dc:date>
    </item>
  </channel>
</rss>

