sphinxsearchWhat are some alternatives to SphinxSearch for software development?

Apache Lucene: Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform. Example code:

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;

public class Searcher {

  public static void search(String indexDir, String q) throws Exception {
    //Get directory reference
    Directory dir = FSDirectory.open(Paths.get(indexDir));

    //Index reader - an interface for accessing a point-in-time view of a lucene index
    IndexReader reader = DirectoryReader.open(dir);

    //Create lucene searcher. It search over a single IndexReader.
    IndexSearcher searcher = new IndexSearcher(reader);

    //Analyzer with the default stop words
    Analyzer analyzer = new StandardAnalyzer();

    //Query parser to be used for creating TermQuery
    QueryParser parser = new QueryParser(fieldName, analyzer);
    Query query = parser.parse(q);
    System.out.println("Searching for: " + query.toString(fieldName));

    //Collects the top ten hits for the query
    TopScoreDocCollector collector = TopScoreDocCollector.create(10);

    //search the index
    searcher.search(query, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    //Examine the Hits object to see if there were any matches
    int numTotalHits = collector.getTotalHits();
    System.out.println(numTotalHits + " total matching documents");

    //Iterate over the documents in the hits
    for (int i = 0; i < numTotalHits; i++) {
      Document doc = searcher.doc(hits[i].doc);
      String path = doc.get("path");
      if (path != null) {
        System.out.println((i + 1) + ". " + path);
        String title = doc.get("title");
        if (title != null) {
          System.out.println("   Title: " + doc.get("title"));
        }
      } else {
        System.out.println((i + 1) + ". " + "No path for this document");
      }
    }
  }
}

Elasticsearch: Elasticsearch is a distributed, open source search and analytics engine for all types of data, including textual, numerical, geospatial, structured, and unstructured. Example code:

from elasticsearch import Elasticsearch

# Connect to the elastic cluster
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

# Search for documents
res = es.search(index="my_index", body={"query": {"match": {"title": "Elasticsearch"}}})

# Print the results
print("Got %d Hits:" % res['hits']['total'])
for hit in res['hits']['hits']:
    print("%(title)s %(author)s: %(text)s" % hit["_source"])

Solr: Apache Solr is a popular, blazing-fast, open source enterprise search platform from the Apache Lucene project. It’s designed for scalability and fault tolerance, and provides distributed indexing, replication, and load-balanced querying, automated failover and recovery, centralized configuration and more. Example code:

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;

public class SolrjExample {

  public static void main(String[] args) throws SolrServerException {
    //Create Solr client
    String solrUrl = "http://localhost:8983/solr/gettingstarted";
    HttpSolrClient solrClient = new HttpSolrClient.Builder(solrUrl).build();

    //Create query
    SolrQuery query = new SolrQuery();
    query.setQuery("*:*");
    query.setStart(0);
    query.setRows(10);

    //Execute query
    QueryResponse response = solrClient.query(query);
    SolrDocumentList documents = response.getResults();

    //Iterate over the results
    for (SolrDocument document : documents) {
      System.out.println(document.getFieldValue("id"));
      System.out.println(document.getFieldValue("name"));
      System.out.println(document.getFieldValue("price"));
    }
  }
}

Xapian: Xapian is an open source search engine library written in C++. It supports full text search, boolean search, relevance feedback, phrase search, term-expansion, and other advanced features. Example code:

#include <xapian.h>
#include <iostream>
#include <string>

int main() {
    // Create or open the database
    Xapian::WritableDatabase db("/var/lib/xapian/data/", Xapian::DB_CREATE_OR_OPEN);

    // Create the document
    Xapian::Document doc;
    doc.set_data("Hello, world!");
    doc.add_term("Hello");
    doc.add_term("World");

    // Add the document to the database
    db.add_document(doc);

    // Commit the changes
    db.commit();

    // Perform a search
    Xapian::Query query("Hello");
    Xapian::Enquire enquire(db);
    enquire.set_query(query);
    Xapian::MSet matches = enquire.get_mset(0, 10);

    // Output the results
    std::cout << matches.get_matches_estimated() << " results found." << std::endl;
    for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
        Xapian::Document doc = i.get_document();
        std::cout << "Document ID " << *i << ": " << doc.get_data() << std::endl;
    }

    return 0;
}

Terrier: Terrier is a powerful, high performance open source search engine written in Java. It supports distributed search, query expansion, relevance feedback, and other advanced features. Example code:

import org.terrier.querying.Manager;
import org.terrier.querying.SearchRequest;
import org.terrier.structures.Index;

public class TerrierExample {

  public static void main(String[] args) throws Exception {
    //Create Terrier index
    Index index = Index.createIndex("/var/lib/terrier/index", "data");

    //Create Terrier manager
    Manager manager = new Manager(index);

    //Create search request
    SearchRequest srq = manager.newSearchRequest("query1", "Hello world");
    srq.setControl("c", "1");

    //Run search request
    manager.runPreProcessing(srq);
    manager.runMatching(srq);
    manager.runPostProcessing(srq);
    manager.runPostFilters(srq);

    //Print results
    System.out.println("Found " + srq.getResultSet().getResultSize() + " results");
    for (int i = 0; i < srq.getResultSet().getResultSize(); i++) {
      System.out.println(srq.getResultSet().getDocids()[i] + " " + srq.getResultSet().getScores()[i]);
    }
  }
}

Whoosh: Whoosh is a fast, feature-rich full-text indexing and searching library implemented in pure Python. It supports fielded indexing, fast wildcard and prefix searches, phrase and proximity searches, fielded sorting, and more. Example code:

from whoosh.index import create_in
from whoosh.fields import *

schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
ix = create_in("indexdir", schema)
writer = ix.writer()
writer.add_document(title=u"First document", path=u"/a",
                    content=u"This is the first document we've added!")
writer.add_document(title=u"Second document", path=u"/b",
                    content=u"The second one is even more interesting!")
writer.commit()

from whoosh.qparser import QueryParser
with ix.searcher() as searcher:
    query = QueryParser("content", ix.schema).parse("first")
    results = searcher.search(query)
    print(results[0])

Zettair: Zettair is a lightweight, full-text search engine written in C. It supports boolean queries, phrase queries, wildcard queries, relevance ranking, and more. Example code:

#include <stdio.h>
#include <zettair.h>

int main(int argc, char **argv) {
    /* open the index */
    zettair_t *z;
    if (zettair_open(&z, "index", 0) != ZETTAIR_OK) {
        fprintf(stderr, "error: could not open index\n");
        return 1;
    }

    /* create a query object */
    query_t *q;
    if (query_create(&q, z) != QUERY_OK) {
        fprintf(stderr, "error: could not create query object\n");
        return 1;
    }

    /* parse a query */
    if (query_parse(q, "hello world") != QUERY_OK) {
        fprintf(stderr, "error: could not parse query\n");
        return 1;
    }

    /* execute the query */
    unsigned int *matches;
    unsigned int matches_len;
    if (query_execute(q, &matches, &matches_len) != QUERY_OK) {
        fprintf(stderr, "error: could not execute query\n");
        return 1;
    }

    /* print the results */
    printf("%u matches:\n", matches_len);
    unsigned int i;
    for (i = 0; i < matches_len; i++) {
        printf("  %u\n", matches[i]);
    }

    /* clean up */
    query_destroy(q);
    zettair_close(z);
    return 0;
}

These are some of the alternatives to SphinxSearch for software development. Each of these search engines has its own advantages and disadvantages, so it is important to choose the one that best suits the needs of the project.

Edit this code on GitHub

More of Sphinxsearch

See more codes...