/*************************************************
Copyright 2009 ELEKA Ingeniaritza Linguistikoa

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
***************************************************/

#include "ocropusgui.h"

#include <wx/msgdlg.h>
#include <wx/intl.h>
#include <wx/string.h>
#include <wx/image.h>
#include <wx/filedlg.h>
#include <wx/file.h>
#include <wx/filesys.h>
#include <wx/stdpaths.h>
#include <wx/filename.h>
#include <wx/txtstrm.h>
#include <wx/log.h>

#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xpathInternals.h>

#include <fstream>

#define OCROPUS_LAUNCH_CMD _T("ocroscript recognize --tesslanguage=eus ")
#define PROCESS_STEP 150

IMPLEMENT_APP(OCRopusGUI)

bool OCRopusGUI::OnInit()
{
    //http://www.nabble.com/locale-in-isosurf-sample-td15068126.html
    //setlocale(LC_ALL, "C");

    wxInitAllImageHandlers();
    MainDialog dlg;
    SetTopWindow(&dlg);
    dlg.ShowModal();
    return false;
}

const long MainDialog::ID_TEXTCTRL_IMAGE_PATH = wxNewId();
const long MainDialog::ID_BUTTON_BROWSE_IMAGE = wxNewId();
const long MainDialog::ID_TEXTCTRL_OUTFILE_PATH = wxNewId();
const long MainDialog::ID_BUTTON_BROWSE_OUTFILE = wxNewId();
const long MainDialog::ID_TIMER = wxNewId();
const long MainDialog::ID_PROCESS = wxNewId();
const long MainDialog::ID_CHECKBOX_APPLY_SPELL_CHECKER = wxNewId();
wxMutex *MainDialog::mutex = new wxMutex();

BEGIN_EVENT_TABLE(MainDialog, wxDialog)
    //(*EventTable(MainDialog)
    EVT_BUTTON(wxID_APPLY, MainDialog::OnOcrIt)
    EVT_BUTTON(wxID_HELP, MainDialog::OnAbout)
    EVT_BUTTON(ID_BUTTON_BROWSE_IMAGE, MainDialog::OnBrowseImage)
    EVT_BUTTON(ID_BUTTON_BROWSE_OUTFILE, MainDialog::OnBrowseOutputFile)
    EVT_TIMER(ID_TIMER, MainDialog::OnTimer)
    //*)
END_EVENT_TABLE()

MainDialog::MainDialog()
{
    dlgProgress = NULL;
    timer.SetOwner(this, ID_TIMER);
    
    xmlInitParser();

    wxFlexGridSizer* boxMain;
    wxStaticBoxSizer* boxOutputPath;
    wxStaticBoxSizer* boxImagePath;
    wxStdDialogButtonSizer* boxButtons;
    
    Create(NULL, wxID_ANY, _("Tesseract-OCRopus euskaraz"), wxDefaultPosition, wxDefaultSize, wxDEFAULT_DIALOG_STYLE, _T("wxID_ANY"));
    SetClientSize(wxSize(142,212));
    boxMain = new wxFlexGridSizer(4, 1, 0, 0);
    boxImagePath = new wxStaticBoxSizer(wxHORIZONTAL, this, _("OCRtik pasa nahi den irudiaren kokapena"));
    txtImagePath = new wxTextCtrl(this, ID_TEXTCTRL_IMAGE_PATH, wxEmptyString, wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator, _T("ID_TEXTCTRL_IMAGE_PATH"));
    boxImagePath->Add(txtImagePath, 3, wxALL|wxEXPAND|wxALIGN_CENTER_HORIZONTAL|wxALIGN_CENTER_VERTICAL, wxDLG_UNIT(this,wxSize(5,0)).GetWidth());
    btnBrowseImage = new wxButton(this, ID_BUTTON_BROWSE_IMAGE, _("Arakatu"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator, _T("ID_BUTTON_BROWSE_IMAGE"));
    boxImagePath->Add(btnBrowseImage, 1, wxALL|wxALIGN_CENTER_HORIZONTAL|wxALIGN_CENTER_VERTICAL, 5);
    boxMain->Add(boxImagePath, 1, wxALL|wxEXPAND|wxALIGN_CENTER_HORIZONTAL|wxALIGN_CENTER_VERTICAL, 5);
    boxOutputPath = new wxStaticBoxSizer(wxHORIZONTAL, this, _("Emaitza fitxategiaren kokapena"));
    txtHtmlFilePath = new wxTextCtrl(this, ID_BUTTON_BROWSE_OUTFILE, wxEmptyString, wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator, _T("IID_BUTTON_BROWSE_OUTFILE"));
    boxOutputPath->Add(txtHtmlFilePath, 3, wxALL|wxEXPAND|wxALIGN_CENTER_HORIZONTAL|wxALIGN_CENTER_VERTICAL, wxDLG_UNIT(this,wxSize(5,0)).GetWidth());
    btnBrowseHtmlFile = new wxButton(this, ID_BUTTON_BROWSE_OUTFILE, _("Arakatu"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator, _T("ID_BUTTON_BROWSE_OUTFILE"));
    boxOutputPath->Add(btnBrowseHtmlFile, 1, wxALL|wxALIGN_CENTER_HORIZONTAL|wxALIGN_CENTER_VERTICAL, 5);
    boxMain->Add(boxOutputPath, 1, wxALL|wxEXPAND|wxALIGN_CENTER_HORIZONTAL|wxALIGN_CENTER_VERTICAL, 5);
    chkApplySpellChecker = new wxCheckBox(this, ID_CHECKBOX_APPLY_SPELL_CHECKER, _("Zuzentzaile ortografikoa pasa"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator, _T("ID_CHECKBOX_APPLY_SPELL_CHECKER"));
    chkApplySpellChecker->SetValue(false);
    boxMain->Add(chkApplySpellChecker, 1, wxALL|wxALIGN_LEFT|wxALIGN_CENTER_VERTICAL, 5);
    boxButtons = new wxStdDialogButtonSizer();
    boxButtons->AddButton(new wxButton(this, wxID_CANCEL, _("Utzi")));
    boxButtons->AddButton(new wxButton(this, wxID_APPLY, _("OCR")));
    boxButtons->AddButton(new wxButton(this, wxID_HELP, _("Laguntza")));
    boxButtons->Realize();
    boxMain->Add(boxButtons, 1, wxALL|wxEXPAND|wxALIGN_CENTER_HORIZONTAL|wxALIGN_CENTER_VERTICAL, 5);
    SetSizer(boxMain);
    boxMain->SetSizeHints(this);
    
    InitSpellChecker();
}

MainDialog::~MainDialog()
{
    /* Shutdown libxml */
    xmlCleanupParser();

    /*
     * this is to debug memory for regression tests
     */
    xmlMemoryDump();

    if (NULL != lexicon)
        delete lexicon;
        
    if (NULL != mutex)
        delete mutex;
}

void MainDialog::InitSpellChecker()
{
    wxFileName filename;

    //hiztegietara 
    if (getenv("TESSDATA_PREFIX"))
    {
        wxString path = wxString::FromUTF8(getenv("TESSDATA_PREFIX"));
        filename.AssignDir(path);
    }
    else
    {
#ifdef TESSDATA_PREFIX
#define _STR(a) #a
#define _XSTR(a) _STR(a)
        filename.AssignDir(_T(_XSTR(TESSDATA_PREFIX)));
#undef _XSTR
#undef _STR
#else
        wxString path = wxStandardPaths::Get().GetExecutablePath().BeforeLast(wxFileName::GetPathSeparator());
        filename.AssignDir(path);
#endif
    }

    filename.AppendDir(_T("tessdata"));
    
    wxFileName aff(filename.GetPath(wxPATH_GET_VOLUME | wxPATH_GET_SEPARATOR) + _T("eus.OCR.aff"));
    wxFileName dic(filename.GetPath(wxPATH_GET_VOLUME | wxPATH_GET_SEPARATOR) + _T("eus.OCR.dic"));
    wxFileName per(filename.GetPath(wxPATH_GET_VOLUME | wxPATH_GET_SEPARATOR) + _T("eus.pertsonala.dic"));
    wxFileName mut(filename.GetPath(wxPATH_GET_VOLUME | wxPATH_GET_SEPARATOR) + _T("eus.aldaketak.dic"));

    if (aff.FileExists() && dic.FileExists() && mut.FileExists())
    {
        lexicon = new Hunspell(aff.GetShortPath().fn_str(), dic.GetShortPath().fn_str());

        if (per.FileExists())
            lexicon->add_dic(per.GetShortPath().fn_str());

        LoadMutationFile(mut.GetShortPath().fn_str());
    }
    else
    {
        wxLogWarning(_("Ez dira aurkitu zuzentzaile ortografikoaren datu-fitxategiak.\nZuzentzaile ortografikoa pasatzeko aukera ezingo da aukeratu."));
        chkApplySpellChecker->SetValue(false);
        chkApplySpellChecker->Disable();
        lexicon = NULL;
    }
}

void MainDialog::LoadMutationFile(const char *filename)
{
    std::string line;
    std::ifstream file(filename);

    if (!file.is_open())
        return;

    while(std::getline(file, line))
    {
        std::size_t pos = line.find('\t');

        if(pos != std::string::npos)
            reps.insert(std::pair<std::string,std::string>(line.substr(pos + 1),line.substr(0,pos)));
    }
}

bool MainDialog::SaveToOutputFile(wxArrayString &output)
{
    //hocr.lua fitxategiak duen bug batek egindakoa zuzendu xpath erabili ahal izateko
    wxString buffer;
    for (size_t i = 0; i < output.GetCount(); i++ )
    {
        if ((i == 2) && output.Item(i).Strip(wxString::leading).StartsWith(_T("http")))
            output.Item(i).Replace(_T("http"), _T("\"http"), false);
        if ((i == 3) && output.Item(i).Strip(wxString::leading).StartsWith(_T("<html xmlns=\"http://www.w3.org/1999/xhtml\">")))
            output.Item(i).Replace(_T("<html xmlns=\"http://www.w3.org/1999/xhtml\">"), _T("<html>"), false);
        
        buffer.Append(output.Item(i));

        if ((i == 1) && (output.Item(i).Strip().Last() != '"'))
            buffer.Append(_T("\""));

        buffer.Append(_T("\n"));
    }

    //HOCR -> HTML bihurketa egiten saiatuko gara
    xmlDocPtr doc;
    xmlXPathContextPtr xpathCtx; 
    xmlXPathObjectPtr xpathObj; 

    /* Load XML document */
    doc = xmlParseMemory(buffer.fn_str(), strlen(buffer.fn_str()));
    if (doc == NULL) {
        if (chkApplySpellChecker->GetValue())
            wxLogWarning(_("Ezin izan da emaitza kargatu zuzendu ahal izateko.\nBere hortan gordeko da."));

        return SaveToOutputFile(buffer);
    }

    /* Create xpath evaluation context */
    xpathCtx = xmlXPathNewContext(doc);
    if(xpathCtx == NULL) {
        if (chkApplySpellChecker->GetValue())
            wxLogWarning(_("Ezin izan da emaitza kargatu zuzendu ahal izateko.\nBere hortan gordeko da."));

        xmlFreeDoc(doc);
        return SaveToOutputFile(buffer);
    }

    /* Evaluate xpath expression */
    xpathObj = xmlXPathEvalExpression((const xmlChar*)"//span", xpathCtx);
    if(xpathObj == NULL) {
        if (chkApplySpellChecker->GetValue())
            wxLogWarning(_("Ezin izan da emaitza kargatu zuzendu ahal izateko.\n"));

        xmlXPathFreeContext(xpathCtx); 
        xmlFreeDoc(doc); 
        return SaveToOutputFile(buffer);
    }

    Bbox2Style(xpathObj->nodesetval);

    /* Cleanup of XPath data */
    xmlXPathFreeObject(xpathObj);
    xmlXPathFreeContext(xpathCtx); 
    
    //gorde berriro ere fitxategia
    FILE *f = fopen(txtHtmlFilePath->GetValue().fn_str(), "w");
    xmlDocDump(f, doc);
    fclose(f);
    
    xmlFreeDoc(doc);
    return true;
}

bool MainDialog::SaveToOutputFile(const wxString &output)
{
    //konprobatu fitxategia sor dezakegula
    wxFile file;
    
    if (!file.Create(txtHtmlFilePath->GetValue().c_str(), true))
    {
        wxLogError(_("Errorea!\n%s sortzeko baimenik ez duzu."), txtHtmlFilePath->GetValue().c_str());
        return false;
    }

    file.Write(output);
    file.Flush();
    file.Close();
    return true;
}

void MainDialog::Bbox2Style(xmlNodeSetPtr nodes)
{
    int size = (nodes) ? nodes->nodeNr : 0;
    size_t bufferlen = 1000;
    xmlChar *corrected = chkApplySpellChecker->GetValue() ? new xmlChar[bufferlen] : NULL;
    char *out = NULL;
    
    for(int i = size - 1; i >= 0; i--)
    {
        xmlNodePtr pSpanNode(nodes->nodeTab[i]);
        xmlChar *attrValue = xmlGetProp(pSpanNode, (xmlChar*)"title");
        char *bbox;
        
        if (NULL != attrValue)
        {
            if (NULL != (bbox = strstr((const char*)attrValue, "bbox")))
            {
                int x1, y1, x2, y2;
                bbox += 5;
            
                if (4 == sscanf(bbox, "%d %d %d %d", &x1, &y1, &x2, &y2))
                {
                    char styleValue[250];
                    sprintf(styleValue, "position:absolute; left:%dpx; top:%dpx;", x1, y1);  
                    xmlSetProp(pSpanNode, (xmlChar*)"style", (const xmlChar*)styleValue);
                }
            }

            xmlFree(attrValue);
        }

        if (!chkApplySpellChecker->GetValue())
            continue;

        //Zuzentzailea pasa OCR errore tipikoak zuzentzeko
        xmlChar *value = xmlNodeGetContent(nodes->nodeTab[i]);

        if (NULL == value)
            continue;

        if (strlen((const char*)value) * 5 > bufferlen)
        {
            delete [] corrected;
            bufferlen = strlen((const char*)value) * 5;
            corrected = new xmlChar[bufferlen];
        }

        memset(corrected, 0, bufferlen);
        
        //tokenetan banatu
        size_t idx = 0;
        size_t tokenStart = 0;
        size_t valuelen = strlen((const char*)value);
        while (idx < valuelen)
        {
            for (tokenStart = idx; idx < valuelen; idx++, tokenStart++)
            {
                if (!ispunct((char)value[idx]) && !isspace((char)value[idx]))
                   break;

                strncat((char*)corrected, (char*)value + idx, 1);
            }

            for(idx++; idx < valuelen; idx++)
            {
                if (ispunct((char)value[idx]) || isspace((char)value[idx]))
                   break;
            }

            if (tokenStart < valuelen)
            {
                xmlChar *token = new xmlChar[(idx + 1) - tokenStart];
                strncpy((char*)token, (char*)value + tokenStart, idx - tokenStart);
                token[idx - tokenStart] = 0;
                                
                if (!lexicon->spell((const char*)token))
                {
                    //fprintf(stderr, "%s\t", (const char*)token);
                    
                    out = NULL;

                    if (CorrectToken((const char*)token, 0, &out))
                    {
                        //fprintf(stderr, "%s", out);
                        strcat((char*)corrected, out);
                        delete [] out;
                    }
                    else
                    {
                        strcat((char*)corrected, (const char*)token);
                    }

                    //fprintf(stderr, "\n");
                }
                else
                    strcat((char*)corrected, (const char*)token);
                    
                delete [] token;
            }
        }

        xmlFree(value);
        xmlNodeSetContent(nodes->nodeTab[i], corrected);
    }

    if (NULL != corrected)
        delete [] corrected;
}

bool MainDialog::CorrectToken(const char *hitza, size_t pos, char **correctedWord)
{
    Reps::const_iterator iter;
    std::string rep, key;
    std::string hitzas(hitza);
    std::string right, left, berria;
    int non;

    if (pos <= hitzas.size())
    {
        for (iter = reps.begin();iter != reps.end(); ++iter)
        {
            key =  iter->first;
            rep = iter->second;
            non = hitzas.find(key,pos);
            if (non != std::string::npos)
            {
                left = hitzas.substr(0,non);
                right = hitzas.substr(non+(iter->first).size());
                berria = left + rep + right;

                if (lexicon->spell(berria.c_str()))
                {
                    *correctedWord = new char[berria.size() + 1];
                    strcpy(*correctedWord, berria.c_str());
                    return true;
                }

                if (CorrectToken(berria.c_str(), non+rep.size(), correctedWord))
                    return true;
            }         
        }

        return CorrectToken(hitza, pos+1, correctedWord);
    }

    return false;
}

bool MainDialog::ReadAllLines(wxInputStream &is, wxArrayString& output)
{
    wxTextInputStream tis(is);

    bool cont = true;
    while (cont)
    {
        wxString line = tis.ReadLine();

        if (is.Eof())
            break;

        if (!is)
            cont = false;
        else
            output.Add(line);
    }

    return cont;
}

void MainDialog::ProcessOutput(wxProcess &process, bool isError)
{
    wxArrayString output;
    
    if (isError)
    {
        if (process.IsErrorAvailable() && ReadAllLines(*process.GetErrorStream(), output))
        {
            wxString message = _("OCR prozesua errorearekin amaitu da:\n\n");
            
            for (int i = 0; i < output.Count(); i++)
            {
                message.Append(output.Item(i));
                message.Append(_T("\n"));
            }

            wxLogError(message);
        }
        else
        {
            wxLogError(_("OCR prozesua errore ezezagun batekin amaitu da."));
        }
    }
    else
    {
        if (process.IsInputAvailable())
        {
            ReadAllLines(*process.GetInputStream(), output);
            SaveToOutputFile(output);
            
            //ireki fitxategia
            wxFileName filename(txtHtmlFilePath->GetValue());
            wxLaunchDefaultBrowser(wxFileSystem::FileNameToURL(filename));
        }
        else
        {
            //errorerik gabeko emaitza hutsa
            wxLogWarning(_("OCR prozesuak ez du emaitzik itzuli."));
        }
    }    
}

void MainDialog::OnOcrIt(wxCommandEvent& event)
{
    if (!wxFile::Exists(txtImagePath->GetValue()))
    {
        wxLogError(_("Kontuz!\nIrudi fitxategia ez da existitzen."));
        return;
    }

    if (wxFile::Exists(txtHtmlFilePath->GetValue()))
    {
        if (!wxFile::Access(txtHtmlFilePath->GetValue(), wxFile::write))
        {
            wxLogError(_("Kontuz!\n%s berridazteko baimenik ez duzu."), txtHtmlFilePath->GetValue().c_str());
            return;
        }
    }
    else
    {
        //konprobatu fitxategia sor dezakegula
        /*wxFile file;
        file.Create(txtHtmlFilePath->GetValue());
        if (!file.IsOpened())
        {
            wxMessageBox(wxString::Format(_("Kontuz!\n%s sortzeko baimenik ez duzu.\nBeste bat aukeratu."), txtHtmlFilePath->GetValue().fn_str()), _("Errorea"), wxOK | wxICON_ERROR);
            return;
        }
        
        file.Close();*/
    }

    // Create the process string
    wxString launchCommand = OCROPUS_LAUNCH_CMD;
    launchCommand += _T("\"") + txtImagePath->GetValue() + _T("\"");
    
    progress = 0;
    dlgProgress = new wxProgressDialog
                        (
                         _("OCR prozesua martxan"),
                         _("Itxaron prozesua amaitu arte mesedez, luze jo baitezake."),
                         PROCESS_STEP,
                         this,
                         wxPD_APP_MODAL
                        );
                        
    timer.Start(200, false);
    
    wxProcess *ocropusprocess = new wxProcess(this, ID_PROCESS);
    ocropusprocess->Redirect();
    
    long retval = wxExecute(launchCommand, wxEXEC_SYNC, ocropusprocess);
    ProcessOutput(*ocropusprocess, 0 != retval);
    
    timer.Stop();
    
    while (wxMUTEX_NO_ERROR !=  mutex->Lock())
        continue;

    if (NULL != dlgProgress)
    {
        dlgProgress->Destroy();
        dlgProgress = NULL;
    }

    if (NULL != ocropusprocess)
        delete ocropusprocess;

    mutex->Unlock();
 }

void MainDialog::OnBrowseImage(wxCommandEvent& event)
{
    wxFileDialog dlg(this, _("Aukeratu irudi fitxategia"), wxEmptyString, wxEmptyString, _T("Irudiak (*.png;*.jpg)|*.png;*.jpg") , wxFD_DEFAULT_STYLE|wxFD_OPEN|wxFD_FILE_MUST_EXIST, wxDefaultPosition, wxDefaultSize, _T("wxFileDialog"));
    
    if (dlg.ShowModal() == wxID_OK)
    {
        txtImagePath->SetValue(dlg.GetPath());
    }
}

void MainDialog::OnBrowseOutputFile(wxCommandEvent& event)
{
    wxFileDialog dlg(this, _("Aukeratu irteera fitxategia"), wxEmptyString, wxEmptyString, _T("HTML fitxategia (*.html;*.htm)|*.html;*.htm"), wxFD_DEFAULT_STYLE|wxFD_SAVE|wxFD_OVERWRITE_PROMPT, wxDefaultPosition, wxDefaultSize, _T("wxFileDialog"));
    
    if (dlg.ShowModal() == wxID_OK)
    {
        txtHtmlFilePath->SetValue(dlg.GetPath());
    }
}

void MainDialog::OnAbout(wxCommandEvent& event)
{
    wxLaunchDefaultBrowser(_T("http://www.eleka.net/dokumentuak/ocr_laguntza.html"));
}

void MainDialog::OnTimer(wxTimerEvent &event)
{
    if (wxMUTEX_NO_ERROR == mutex->TryLock())
    {
        if (NULL != dlgProgress)
        {
            dlgProgress->Update(++progress);

            if (progress + 1 == PROCESS_STEP)
                progress = 1;
        }
        
        mutex->Unlock();
    }
}

