<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: integrate tesseract ocr into alfresco in Alfresco Archive</title>
    <link>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309287#M262417</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Hi!&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;I'm evaluating Alfresco 5, and i don't found any tutorial about tesseract integration on 5.0x versions. How did you do the integration? &lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;Thanks in advance, &lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;J.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Sat, 09 Jan 2016 17:17:25 GMT</pubDate>
    <dc:creator>noferdito</dc:creator>
    <dc:date>2016-01-09T17:17:25Z</dc:date>
    <item>
      <title>integrate tesseract ocr into alfresco</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309286#M262416</link>
      <description>Hi,I integrated tesseract ocr into alfresco 5.0.d, it worked very well and supported tiff, png, jepg media format.But I hope to save the detected text content into alfresco, which can be indexed by solr and searched.Is there any good solution for this usecase?Thanks in advance.Tyshan</description>
      <pubDate>Tue, 03 Nov 2015 12:53:19 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309286#M262416</guid>
      <dc:creator>tyshan</dc:creator>
      <dc:date>2015-11-03T12:53:19Z</dc:date>
    </item>
    <item>
      <title>Re: integrate tesseract ocr into alfresco</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309287#M262417</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Hi!&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;I'm evaluating Alfresco 5, and i don't found any tutorial about tesseract integration on 5.0x versions. How did you do the integration? &lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;Thanks in advance, &lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;J.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 09 Jan 2016 17:17:25 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309287#M262417</guid>
      <dc:creator>noferdito</dc:creator>
      <dc:date>2016-01-09T17:17:25Z</dc:date>
    </item>
    <item>
      <title>Re: integrate tesseract ocr into alfresco</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309288#M262418</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;[Not the Op]&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;Did you try this tutorial by board member dougalscrp:&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt; &lt;/SPAN&gt;&lt;A href="http://www.seedim.com.au/content/alfresco-search-pdf-images-using-transformations-and-tesseract-ocr" rel="nofollow noopener noreferrer"&gt;http://www.seedim.com.au/content/alfresco-search-pdf-images-using-transformations-and-tesseract-ocr&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;I didn't integrate ocr into Alfresco myself yet, but it's on my todo-list.&lt;/SPAN&gt;&lt;BR /&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 29 Jan 2016 11:54:05 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309288#M262418</guid>
      <dc:creator>hello_wrold</dc:creator>
      <dc:date>2016-01-29T11:54:05Z</dc:date>
    </item>
    <item>
      <title>Re: integrate tesseract ocr into alfresco</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309289#M262419</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Hi Tyshan,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;The seedim.com.au tutorial should tell you how it works.&amp;nbsp; Basically, if you configure a transformation for each image mimetype (ie png, tiffs etc) to text (I assume using the tesseract transform you have already configured) then when an image is uploaded solr will try to call the img-to-text transform you have configured to get the wordlist.&amp;nbsp; The wordlist is then automatically added to the solr index and points to the image content.&amp;nbsp;&amp;nbsp;&amp;nbsp; Searching will therefore find the image based on the text in the image.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Hope this helps.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Brian&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 03 Feb 2016 06:10:54 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309289#M262419</guid>
      <dc:creator>boneill</dc:creator>
      <dc:date>2016-02-03T06:10:54Z</dc:date>
    </item>
    <item>
      <title>Re: integrate tesseract ocr into alfresco</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309290#M262420</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Adding more information on OCR in alfresco.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;A href="http://www.krutikjayswal.com/2016/07/ocr-on-pdf-file-in-alfresco.html" rel="nofollow noopener noreferrer"&gt;http://www.krutikjayswal.com/2016/07/ocr-on-pdf-file-in-alfresco.html&lt;/A&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sun, 31 Jul 2016 18:09:25 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/integrate-tesseract-ocr-into-alfresco/m-p/309290#M262420</guid>
      <dc:creator>krutik_jayswal</dc:creator>
      <dc:date>2016-07-31T18:09:25Z</dc:date>
    </item>
  </channel>
</rss>

