01-18-2007 08:22 AM
08-30-2007 03:22 PM
08-30-2007 04:10 PM
12-18-2007 01:49 PM
I integrated open source OCR engines Tesseract and Ocropus from Google.
Code is not production ready, but feel free to message me if interested.
Alexander
01-30-2008 09:43 PM
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans>
<bean id="transformer.OCR" class="org.alfresco.repo.content.transform.RuntimeExecutableContentTransformer" parent="baseContentTransformer">
<property name="checkCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandMap">
<map>
<entry key=".*">
<value>ocrocmd –help</value>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2,251</value>
</property>
</bean>
</property>
<property name="transformCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandMap">
<map>
<entry key="Linux">
<value>ocrocmd ${source} > ${target}</value>
</entry>
<entry key="Windows.*">
<value>ocrocmd ${source} > ${target}</value>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2,251</value>
</property>
</bean>
</property>
<property name="explicitTransformations">
<list>
<bean class="org.alfresco.repo.content.transform.ContentTransformerRegistry$TransformationKey" >
<constructor-arg><value>image/jpeg</value></constructor-arg>
<constructor-arg><value>text/plain</value></constructor-arg>
</bean>
</list>
</property>
</bean>
</beans>
11:27:04,312 User:xxxxxxxx DEBUG [util.exec.RuntimeExec] Execution result:
os: Linux
command: ocrocmd /opt/alfresco/tomcat/temp/Alfresco/RuntimeExecutableContentTransformer_source_62969.jpg > /opt/alfresco/tomcat/temp/Alfresco/RuntimeExecutableContentTransformer_target_62970.txt
succeeded: false
exit code: 251
out: <!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta name='ocr-system' content='OCRopus 0.1.1; Tue Jan 29 14:4
err: Ocropus Alpha (sauvola, rast, curved, tesseract)
0.1.1; Tue Jan 29 14:46:16 EST 2008; Linux singer 2.6.22-14-server #1 SMP Tue Dec 18 08:31:40 UTC 2007 i686 GNU/Linux
File is not valid: >
ocrocmd: file format not recognized
21:39:58,035 User:administrator DEBUG [util.exec.RuntimeExec] Execution result:
os: Linux
command: ocrocmd '/opt/alfresco/tomcat/temp/Alfresco/RuntimeExecutableContentTransformer_source_13877.jpg' > '/opt/alfresco/tomcat/temp/Alfresco/RuntimeExecutableContentTransformer_target_13878.txt'
succeeded: true
exit code: 0
out:
err: Ocropus Alpha (sauvola, rast, curved, tesseract)
0.1.1; Tue Jan 29 14:46:16 EST 2008; Linux singer 2.6.22-14-server #1 SMP Tue Dec 18 08:31:40 UTC 2007 i686 GNU/Linux
File is not valid: '/opt/alfresco/tomcat/temp/Alfresco/RuntimeExecutableContentTra
01-31-2008 03:11 PM
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans>
<bean id="transformer.Ocr.Png2Html" class="com.onepoint.transform.RuntimeExecutableOutContentTransformer" parent="baseContentTransformer">
<property name="checkCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandMap">
<map>
<entry key=".*">
<value>ocropus</value>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="transformCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandMap">
<map>
<entry key="Linux*">
<value>ocropus ocr ${source}</value>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="explicitTransformations">
<list>
<bean class="org.alfresco.repo.content.transform.ContentTransformerRegistry$TransformationKey" >
<constructor-arg><value>image/png</value></constructor-arg>
<constructor-arg><value>text/html</value></constructor-arg>
</bean>
</list>
</property>
</bean>
<bean id="transformer.Ocr.Jpeg2Html" class="com.onepoint.transform.RuntimeExecutableOutContentTransformer" parent="baseContentTransformer">
<property name="checkCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandMap">
<map>
<entry key=".*">
<value>ocropus</value>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="transformCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandMap">
<map>
<entry key="Linux*">
<value>ocropus ocr ${source}</value>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="explicitTransformations">
<list>
<bean class="org.alfresco.repo.content.transform.ContentTransformerRegistry$TransformationKey" >
<constructor-arg><value>image/jpeg</value></constructor-arg>
<constructor-arg><value>text/html</value></constructor-arg>
</bean>
</list>
</property>
</bean>
<bean id="transformer.Ocr.Tiff2Txt" class="org.alfresco.repo.content.transform.RuntimeExecutableContentTransformer" parent="baseContentTransformer">
<property name="checkCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandMap">
<map>
<entry key=".*">
<value>tesseract</value>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="transformCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandMap">
<map>
<entry key="Linux*">
<value>tesseract ${source} ${target}</value>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="explicitTransformations">
<list>
<bean class="org.alfresco.repo.content.transform.ContentTransformerRegistry$TransformationKey" >
<constructor-arg><value>image/tiff</value></constructor-arg>
<constructor-arg><value>text/plain</value></constructor-arg>
</bean>
</list>
</property>
</bean>
<bean id="transformer.complex.Jpeg2Text"
class="org.alfresco.repo.content.transform.ComplexContentTransformer"
parent="baseContentTransformer" >
<property name="transformers">
<list>
<ref bean="transformer.Ocr.Jpeg2Html" />
<ref bean="transformer.HtmlParser" />
</list>
</property>
<property name="intermediateMimetypes">
<list>
<value>text/html</value>
</list>
</property>
</bean>
<bean id="transformer.complex.Png2Text"
class="org.alfresco.repo.content.transform.ComplexContentTransformer"
parent="baseContentTransformer" >
<property name="transformers">
<list>
<ref bean="transformer.Ocr.Png2Html" />
<ref bean="transformer.HtmlParser" />
</list>
</property>
<property name="intermediateMimetypes">
<list>
<value>text/html</value>
</list>
</property>
</bean>
</beans>
07-02-2008 04:57 PM
09-12-2008 12:55 AM
09-12-2008 12:44 PM
09-13-2008 01:51 AM
Tags
Find what you came for
We want to make your experience in Hyland Connect as valuable as possible, so we put together some helpful links.