11-15-2010 12:29 PM
tesseract input_file.tif output_file.txt
you will get a file output_file.txt.txttesseract input_file.tif output_file -l eng
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans>
<bean id="transformer.worker.ocr.tiff" class="org.alfresco.repo.content.transform.RuntimeExecutableContentTransformerWorker">
<property name="mimetypeService">
<ref bean="mimetypeService" />
</property>
<property name="checkCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandsAndArguments">
<map>
<entry key=".*">
<list>
<!– <value>tesseract</value> –>
<value>/opt/alfresco/ocr</value>
</list>
</entry>
</map>
</property>
<property name="errorCodes">
<value>2</value>
</property>
</bean>
</property>
<property name="transformCommand">
<bean class="org.alfresco.util.exec.RuntimeExec">
<property name="commandsAndArguments">
<map>
<entry key=".*">
<list>
<!– <value>tesseract</value>
<value>${source}</value>
<value>${target}</value>
<value>-l</value>
<value>eng</value> –>
<value>/opt/alfresco/ocr</value>
<value>${source}</value>
<value>${target}</value>
</list>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="explicitTransformations">
<list>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails">
<property name="sourceMimetype"><value>image/tiff</value></property>
<property name="targetMimetype"><value>text/plain</value></property>
</bean>
</list>
</property>
</bean>
<bean id="transformer.ocr.tiff" class="org.alfresco.repo.content.transform.ProxyContentTransformer" parent="baseContentTransformer">
<property name="worker">
<ref bean="transformer.worker.ocr.tiff" />
</property>
</bean>
</beans>
#!/bin/bash
# save arguments to variables
SOURCE=$1
TARGET=$2
TMPDIR=/tmp
FILENAME=`basename $SOURCE`
OCRFILE=$FILENAME.tif
# to see what happens
#echo "from $SOURCE to $TARGET" >>/tmp/ocrtransform.log
cp -f $SOURCE $TMPDIR/$OCRFILE
# call tesseract and redirect output to $TARGET
tesseract $TMPDIR/$OCRFILE ${TARGET%\.*} -l eng
rm -f $TMPDIR/$OCRFILE
12-07-2010 05:58 AM
Made then as executable file (chown 755 ocr)
01-20-2011 05:46 AM
02-03-2011 06:04 AM
02-19-2011 02:49 PM
02-27-2011 04:11 AM
03-01-2011 11:50 PM
06-17-2011 12:16 AM
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans default-lazy-init="false" default-autowire="no" default-dependency-check="none">
<bean id="transformer.worker.ocr.tiff" class="org.alfresco.repo.content.transform.RuntimeExecutableContentTransformerWorker" lazy-init="default" autowire="default" dependency-check="default">
<property name="mimetypeService">
<ref bean="mimetypeService" />
</property>
<property name="checkCommand">
<bean class="org.alfresco.util.exec.RuntimeExec" lazy-init="default" autowire="default" dependency-check="default">
<property name="commandsAndArguments">
<map>
<entry key="Windows.*">
<list>
<value>C:\Windows\System32\cmd.exe</value>
<value>/C</value>
<value>dir c:\Alfresco\ocr.bat</value>
</list>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1</value>
</property>
</bean>
</property>
<property name="transformCommand">
<bean class="org.alfresco.util.exec.RuntimeExec" lazy-init="default" autowire="default" dependency-check="default">
<property name="commandsAndArguments">
<map>
<entry key="Windows.*">
<list>
<value>C:\Windows\System32\cmd.exe</value>
<value>/C</value>
<value>C:\Alfresco\ocr.bat</value>
<value>"${source}"</value>
<value>"${target}"</value>
</list>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="explicitTransformations">
<list>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/tiff</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
</list>
</property>
</bean>
<bean id="transformer.ocr.tiff" class="org.alfresco.repo.content.transform.ProxyContentTransformer" parent="baseContentTransformer" lazy-init="default" autowire="default" dependency-check="default">
<property name="worker">
<ref bean="transformer.worker.ocr.tiff" />
</property>
</bean>
</beans>
REM to see what happens
echo from %1 to %2 >>C:\tmp\ocrtransform.log
copy /Y %1 C:\TMP\%~n1%~x1
REM call tesseract and redirect output to $TARGET
"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe" C:\TMP\%~n1%~x1 %~d2%~p2%~n2 -l fra
del C:\TMP\%~n1%~x1
09-30-2011 04:05 AM
12-03-2011 02:57 PM
Tags
Find what you came for
We want to make your experience in Hyland Connect as valuable as possible, so we put together some helpful links.