Data Science Specialization Capstone Project (2/2): The Next Word App

This is my Capstone Project for the Data Science Specialization by Johns Hopkins University on Coursera.
The goal of the Capstone Project was to implement a useful model for predict a word given one or more words and to develop a web app to use it.
In the last post you can find how I created the tables used for this app that I will use in this page.
Below there is the code for the app written using R Studio and Shiny, in addiction to R there is a bit of HTML and CSS.
You can find the entire code on my Github.
You can try the app here.


ui.R file

library(shiny)

shinyUI(fluidPage(

  tags$head(tags$style(
    HTML('
         #word {
            color: gold;
            font-size: 300%;
         }
         #others {
            color: gold;
            font-size: 200%;
         }
        tabPanel{ 
          font-family: "Calibri";
          color: gold;
        }
        body, label, input, button, select { 
          font-family: "Calibri";
          background: #000000 url("http://massyfigini.github.io/assets/css/images/DocStrange.jpg")  bottom left;
          background-position: center center;
          background-repeat: no-repeat;
          background-attachment: fixed;
          background-size: cover;
        }'))),

  titlePanel(HTML("<font size=10 color=white><b><center>The Next Word App</center></b></font><br/>"),windowTitle="The Next Word App"),

  sidebarLayout(
    sidebarPanel(tags$style(".well {background-color:#d3d3d3;}"),
     tabsetPanel(

       # First tab (input)
       tabPanel(HTML("<font color=black>Input</font>"),
                textInput("Word", NULL, 
                          placeholder = "Write the words here...")
                ,actionButton("go","Go!", icon = icon("plane"))
                ,HTML("<br/><br/><br/><a href=http://www.massimilianofigini.com>&copy;massyfigini</a>")),

       # Second tab (Instruction)
       tabPanel(HTML("<font color=black>About the app</font>"),
                HTML("<br/><b>Instruction</b><br/>
This app predict the next word given one or more words.<br/>
In the input tab, you have to write one or more words and press the 'Go!' button, and the app will predict the next word.<br/>
<br/><b>Data</b><br/>
The English 'Corpora' data are the starting point for the algorithm, you can find more information 
<a href=https://web-beta.archive.org/web/20160930083655/http://www.corpora.heliohost.org/aboutcorpus.html>here</a>.
<br/><br/><b>Algorithm</b><br/>
The algorithm is created starting from the 'Corpora' data. The data are first divided in sentences, than I have
made the bigram, trigram and 4-gram data.
Every words you insert, the algorithm choose the most probabilities next word. You can find more information 
<a href=http://rpubs.com/massyfigini/NextWordApp>here</a>.
<br/><br/><a href=http://www.massimilianofigini.com>&copy;massyfigini</a>")))

    ,width=4
    ),

    mainPanel(
      HTML("<font size=5 color=white>Top probability next word</font><br/>"),
      htmlOutput("word"),
      HTML("<br/><br/>"),
      HTML("<font size=5 color=white>Other possibly words</font><br/>"),
      htmlOutput("others")
      )
  )
))

server.R file

library(shiny)
library(dplyr)
load("WordBigram.RData")
load("WordTrigram.RData")
load("WordQuadrigram.RData")

Found <- 'N'
virgola <- ',\n'

shinyServer(function(input, output) {

  observeEvent(input$go, {
    a <- tolower(input$Word)
    a <- unlist(strsplit(a, " ", fixed=TRUE))

    # Algorithm
    if(length(a) > 2) {
      # more then 2 words: first in quadrigram, then trigram, then bigram
      c <- paste(a[length(a)-2], a[length(a)-1], a[length(a)])
      Next <- WordQuadrigram %>% filter(Start == c) %>% select(First,Second,Third)
      # search in trigram
      if(nrow(Next) == 0) {
        b <- paste(a[length(a)-1], a[length(a)])
        Next <- WordTrigram %>% filter(Start == b) %>% select(First,Second,Third)
        if(nrow(Next) == 0) {
          # word not found, search in bigram
          z <- a[length(a)]
          Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
          if(nrow(Next) == 0) {
            # word not found in bigram
            Found <- 'N'
          } else {
            # found in bigram
            Found <- 'B'
            B1 <- Next[1]
            B2 <- Next[2]
            B3 <- Next[3]
          }
        } else {
          # found in trigram
          Found <- 'T'
          T1 <- Next[1]
          T2 <- Next[2]
          T3 <- Next[3]
          # search also in bigram
          z <- a[length(a)]
          Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
          B1 <- Next[1]
          B2 <- Next[2]
          B3 <- Next[3]
        }
        # found in quadrigram
      } else {
        Found <- 'Q'
        Q1 <- Next[1]
        Q2 <- Next[2]
        Q3 <- Next[3]
        # search also in bigram
        z <- a[length(a)]
        Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
        B1 <- Next[1]
        B2 <- Next[2]
        B3 <- Next[3]
      }

    } else if(length(a) == 2) {
      # if are two, search in trigram first
      b <- paste(a[1], a[2])
      Next <- WordTrigram %>% filter(Start == b) %>% select(First,Second,Third)
      if(nrow(Next) == 0) {
        # word not found, search in bigram
        z <- a[length(a)]
        Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
        if(nrow(Next) == 0) {
          # word not found
          Found <- 'N'
        } else {
          # found in bigram
          Found <- 'B'
          B1 <- Next[1]
          B2 <- Next[2]
          B3 <- Next[3]
        }
      } else {
        # found in trigram
        Found <- 'T'
        T1 <- Next[1]
        T2 <- Next[2]
        T3 <- Next[3]
        # found also in bigram
        z <- a[length(a)]
        Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
        B1 <- Next[1]
        B2 <- Next[2]
        B3 <- Next[3]
      }

    } else {
      # if only one go here
      z <- a[1]
      Next <- WordBigram %>% filter(Start == z) %>% select(First,Second,Third)
      if(nrow(Next) == 0) {
        # word not found
        Found <- 'N'
      } else {
        # found
        Found <- 'B'
        B1 <- Next[1]
        B2 <- Next[2]
        B3 <- Next[3]
      }
    }


    output$word <- renderPrint({
      if(Found == 'N') {
      HTML("<font size=5 color=red>Next word not found!</font>")
      } else if(Found == 'B' ) {
        print(unname(B1), row.names=FALSE)
      } else if (Found == 'T'){
        print(unname(T1), row.names=FALSE)
      } else if (Found == 'Q'){
        print(unname(Q1), row.names=FALSE)
      }
    })


    output$others <- renderPrint({
      if(Found == 'N') {
        HTML("<font size=5 color=red>Words not found!</font>")
      } else if(Found == 'B') {
        print(unname(B2), row.names=FALSE)
        cat(",\n")
        print(unname(B3), row.names=FALSE)
      } else if(Found == 'T'){
        if(as.character(T1$First[1]) != as.character(B1$First[1])) {
          print(unname(B1), row.names=FALSE)
          cat(",\n")
        }
        if(as.character(T1$First[1]) != as.character(B2$Second[1])) {
          print(unname(B2), row.names=FALSE)
          cat(",\n")
        }
        if(as.character(T1$First[1]) != as.character(B3$Third[1])) {
          print(unname(B3), row.names=FALSE)
        }
      } else if(Found == 'Q'){
        if(as.character(Q1$First[1]) != as.character(B1$First[1])) {
          print(unname(B1), row.names=FALSE)
          cat(",\n")
        }
        if(as.character(Q1$First[1]) != as.character(B2$Second[1])) {
          print(unname(B2), row.names=FALSE)
          cat(",\n")
        }
        if(as.character(Q1$First[1]) != as.character(B3$Third[1])) {
          print(unname(B3), row.names=FALSE)
        }
      }
    })

  })
})