﻿Imports Lucene.Net.Analysis.Standard
Imports Lucene.Net.Index
Imports Lucene.Net.QueryParsers.Classic
Imports Lucene.Net.Search
Imports Lucene.Net.Store
Imports Lucene.Net.Util
Imports System.IO
Imports System.Data
Imports DocumentFormat.OpenXml.Packaging

Imports LuceneDocument = Lucene.Net.Documents.Document
Imports LuceneQuery = Lucene.Net.Search.Query
Imports LuceneField = Lucene.Net.Documents.Field

Public Class LuceneSearch
    Public Shared ReadOnly LUCENE_VERSION As LuceneVersion = LuceneVersion.LUCENE_48
    Public Shared ReadOnly INDEX_PATH As String = "C:\MyLuceneIndex"

    'change this to your documents folder(these documents will be used for indexed search in this project)
    Public Shared ALLOWED_ROOT As String = "C:\CD_ROOT\TESTDOCS"

    Public Shared Sub BuildIndex(sourceFolder As String)
        Dim dir As FSDirectory = FSDirectory.Open(New DirectoryInfo(INDEX_PATH))
        Dim analyzer As New StandardAnalyzer(LUCENE_VERSION)
        Dim config As New IndexWriterConfig(LUCENE_VERSION, analyzer)

        Using writer As New IndexWriter(dir, config)
            writer.DeleteAll()

            For Each filePath In IO.Directory.GetFiles(sourceFolder, "*.*")
                Dim ext As String = Path.GetExtension(filePath).ToLower()
                Dim textContent As String = ""

                If ext = ".txt" Then
                    textContent = File.ReadAllText(filePath)

                ElseIf ext = ".docx" Then
                    Try
                        Using doc As WordprocessingDocument = WordprocessingDocument.Open(filePath, False)
                            textContent = doc.MainDocumentPart.Document.Body.InnerText
                        End Using
                    Catch ex As Exception
                        Continue For
                    End Try
                End If

                If Not String.IsNullOrWhiteSpace(textContent) Then
                    Dim lines = textContent.Split({Environment.NewLine, vbCrLf, vbLf}, StringSplitOptions.RemoveEmptyEntries)

                    For Each line In lines
                        Dim luceneDoc As New LuceneDocument()
                        luceneDoc.Add(New Lucene.Net.Documents.StringField("path", filePath, LuceneField.Store.YES))
                        luceneDoc.Add(New Lucene.Net.Documents.StringField("filename", Path.GetFileName(filePath), LuceneField.Store.YES))
                        luceneDoc.Add(New Lucene.Net.Documents.TextField("line", line, LuceneField.Store.YES))
                        writer.AddDocument(luceneDoc)
                    Next
                End If
            Next

            writer.Commit()
        End Using
    End Sub

    Public Shared Function Search(mustWords As String, orWords As String, notWords As String, maxResults As Integer) As DataTable
        Dim dt As New DataTable()
        dt.Columns.Add("filename")
        dt.Columns.Add("path")
        dt.Columns.Add("line")

        Dim dir As FSDirectory = FSDirectory.Open(New DirectoryInfo(INDEX_PATH))
        Using reader = DirectoryReader.Open(dir)
            Dim searcher = New IndexSearcher(reader)
            Dim analyzer = New StandardAnalyzer(LUCENE_VERSION)
            Dim parser As New MultiFieldQueryParser(LUCENE_VERSION, {"line"}, analyzer)

            Dim booleanQuery As New BooleanQuery()

            If Not String.IsNullOrWhiteSpace(mustWords) Then
                Dim words = mustWords.Split(" "c)
                Dim mustQuery As New BooleanQuery()
                For Each w In words
                    Dim q As LuceneQuery = parser.Parse(w)
                    mustQuery.Add(q, Occur.MUST)
                Next
                booleanQuery.Add(mustQuery, Occur.MUST)
            End If

            If Not String.IsNullOrWhiteSpace(orWords) Then
                Dim words = orWords.Split(" "c)
                Dim orQuery As New BooleanQuery()
                For Each w In words
                    Dim q As LuceneQuery = parser.Parse(w)
                    orQuery.Add(q, Occur.SHOULD)
                Next
                orQuery.MinimumNumberShouldMatch = 1
                booleanQuery.Add(orQuery, Occur.SHOULD)
            End If

            If Not String.IsNullOrWhiteSpace(notWords) Then
                Dim words = notWords.Split(" "c)
                For Each w In words
                    Dim q As LuceneQuery = parser.Parse(w)
                    booleanQuery.Add(New BooleanClause(q, Occur.MUST_NOT))
                Next
            End If

            Dim hits As TopDocs = searcher.Search(booleanQuery, maxResults)

            For Each sd In hits.ScoreDocs
                Dim doc = searcher.Doc(sd.Doc)
                Dim row = dt.NewRow()
                row("filename") = doc.Get("filename")
                row("path") = doc.Get("path")
                row("line") = doc.Get("line")
                dt.Rows.Add(row)
            Next
        End Using

        Return dt
    End Function
End Class