# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4

PortSystem          1.0
PortGroup           github 1.0
PortGroup           compiler_blacklist_versions 1.0

name                tesseract

if {${subport} in [list tesseract tesseract-training]} {
    github.setup    tesseract-ocr tesseract 4.1.3
    checksums       rmd160  e2037d64e5549ed0de715ef2d2b082149f3f9232 \
                    sha256  26ea123135c3e0b4f204cd097a9d43ab4293f23425e96128df48e6da1960b40f \
                    size    1975794
} elseif {[variant_isset best]} {
    github.setup    tesseract-ocr tessdata_best  4.1.0
    checksums       rmd160  753e227df9393d064a78d3243a5bc6fdce20f73c \
                    sha256  61e07fc0d2e52f0d1a41dc92a1542725f68ed3110f1df3bae715973cd8042851 \
                    size    1387835400
} elseif {[variant_isset fast]} {
    github.setup    tesseract-ocr tessdata_fast 4.1.0
    checksums       rmd160  8163a934780db1b80edb94e5e1df45427aa4ff23 \
                    sha256  54d80714745d79955e7997fa0df6d3c43b1b1989179d5e64a67290a2eea8be64 \
                    size    352218148
} else {
    github.setup    tesseract-ocr tessdata 4.1.0
    checksums       rmd160  f314b4fa97b64c5bc1c8318bbfce0484d63823d2 \
                    sha256  251c890578e37f6d829c706b7e3728c751e2b061ac57b549b14f8769137dcf0e \
                    size    668512655
}

categories          textproc graphics pdf
platforms           darwin
license             Apache-2

maintainers         {mark @markemer} openmaintainer

description         Open source OCR engine

long_description    This package contains an OCR engine - libtesseract and a \
                    command line program - tesseract. Tesseract 4 adds a new \
                    neural net (LSTM) based OCR engine which is focused on line \
                    recognition, but also still supports the legacy Tesseract \
                    OCR engine of Tesseract 3 which works by recognizing \
                    character patterns.

if {${subport} eq ${name}} {
    github.livecheck.regex  {(\d\.\d+(\.\d+)?(?!-rc))}
} else {
    livecheck.type          none
}

if {${subport} in [list tesseract tesseract-training]} {
    notes "To use tesseract you must also install one of its language data subports. (ex tesseract-eng)"
}

subport ${name}-training {
    depends_lib-append      path:lib/pkgconfig/cairo.pc:cairo \
                            port:icu \
                            path:lib/pkgconfig/pango.pc:pango

    build.target            training

    destroot.target         training-install
}

if {${subport} in [list tesseract tesseract-training]} {
    # error: use of undeclared identifier '__cpuid_count'
    compiler.blacklist-append macports-clang-3.3 {clang < 503}
    
    use_autoreconf          yes
    autoreconf.cmd          ./autogen.sh
    autoreconf.args

    configure.args          --disable-silent-rules

    depends_build-append    port:pkgconfig \
                            port:autoconf \
                            port:automake \
                            port:libtool \
                            port:asciidoc

    depends_lib-append      port:curl \
                            port:leptonica \
                            port:libarchive \
                            path:include/turbojpeg.h:libjpeg-turbo \
                            port:libpng \
                            port:zlib

    patchfiles              patch-fix-mtree-violation-by-cmake-files.diff
} else {
    supported_archs         noarch

    use_configure           no

    build {}

    depends_run-append      port:${name}

    variant best conflicts fast description {Use best training data} {
        notes-append "Legacy OCR Engine mode (--oem 0) is not supported by this variant"
    }

    variant fast conflicts best description {Use fast training data} {
        notes-append "Legacy OCR Engine mode (--oem 0) is not supported by this variant"
    }
}

set langs {
    afr         Afrikaans
    amh         Amharic
    ara         Arabic
    asm         Assamese
    aze         Azerbaijani
    aze_cyrl    Azerbaijani-cyrillic
    bel         Belarusian
    ben         Bengali
    bod         Tibetan
    bos         Bosnian
    bul         Bulgarian
    cat         Catalan
    ceb         Cebuano
    ces         Czech
    chi_sim     Chinese-simple
    chi_tra     Chinese-traditional
    chr         Cherokee
    cym         Welsh
    dan         Danish
    deu         German
    dzo         Dzongkha
    ell         Modern
    eng         English
    enm         Middle
    epo         Esperanto
    est         Estonian
    eus         Basque
    fas         Persian
    fin         Finnish
    fra         French
    frm         Middle
    gle         Irish
    glg         Galician
    grc         Ancient
    guj         Gujarati
    hat         Haitian
    heb         Hebrew
    hin         Hindi
    hrv         Croatian
    hun         Hungarian
    iku         Inuktitut
    ind         Indonesian
    isl         Icelandic
    ita         Italian
    jav         Javanese
    jpn         Japanese
    kan         Kannada
    kat         Georgian
    kaz         Kazakh
    khm         Central
    kir         Kirghiz
    kor         Korean
    kmr         Kurdish
    lao         Lao
    lat         Latin
    lav         Latvian
    lit         Lithuanian
    mal         Malayalam
    mar         Marathi
    mkd         Macedonian
    mlt         Maltese
    msa         Malay
    mya         Burmese
    nep         Nepali
    nld         Dutch
    nor         Norwegian
    ori         Odiya
    osd         OSD
    pan         Panjabi
    pol         Polish
    por         Portuguese
    pus         Pushto
    ron         Romanian
    rus         Russian
    san         Sanskrit
    sin         Sinhala
    slk         Slovak
    slv         Slovenian
    spa         Spanish
    sqi         Albanian
    srp         Serbian
    srp_latn    Serbian-latin
    swa         Swahili
    swe         Swedish
    syr         Syriac
    tam         Tamil
    tel         Telugu
    tgk         Tajik
    tgl         Tagalog
    tha         Thai
    tir         Tigrinya
    tur         Turkish
    uig         Uighur
    ukr         Ukrainian
    urd         Urdu
    uzb         Uzbek
    uzb_cyrl    Uzbek-cyrillic
    vie         Vietnamese
    yid         Yiddish
}

# Kurdish kur data changed to kmr
# Obsolete Date: 2021-11-29
subport ${name}-kur {
    PortGroup           obsolete 1.0

    replaced_by         ${name}-kmr
    description         "Kurdish language data for the Tesseract OCR engine"
    long_description    "Kurdish language data for the Tesseract OCR engine"
}

foreach {lang_code lang_name} ${langs} {
    subport ${name}-[strsed ${lang_code} {g/_/-/}] "
        description         ${lang_name} language data for the Tesseract OCR engine

        long_description    ${lang_name} language data for the Tesseract OCR engine

        destroot {
            xinstall -d -m 0755 ${destroot}${prefix}/share/tessdata/
            xinstall    -m 0644 ${worksrcpath}/${lang_code}.traineddata ${destroot}${prefix}/share/tessdata/
        }
    "
}
