123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184moduletypeMono=sigtypedoc(** The type of documents we will search over *)typeuid(** The type of unique identifiers we will use to identify distinct
documents *)typet(** The search index *)valindex:t->uid:uid->token:string->doc->unit(** [index t doc uid] indexes a given document [doc] in [t] with a
unique identifier [uid]. *)valadd_document:t->uid->doc->unit(** Adds a new document to the indexer *)valadd_index:t->(doc->string)->unit(** Adds a new index and re-indexes everything. *)valadd_indexes:t->(doc->string)list->unit(** Same as {! add_index} but allows you to add multiple indexes
at a time before re-indexing occurs. *)valsearch:t->string->doclist(** [search t k] searches for [t] using [k]. *)valempty:?santiser:(string->string)->?strategy:(string->stringlist)->?tokeniser:(string->stringlist)->unit->t(** Create a new empty search index.
@param sanitiser Run on each token to normalise them, by default this is {! String.lowercase_ascii}
@param strategy The indexing strategy, by default this is a prefixing strategy such that [abc] is indexed with [a], [ab] and [abc]
@param tokeniser Turns your documents into tokens. *)valpp:Format.formatter->t->unit(** Dumps the index, mainly for debugging or testing. *)endmoduletypeUid=sigtypetincludeMap.OrderedTypewithtypet:=tvalto_string:t->stringendmoduletypeGeneric=sigtypettypekeymoduleWitness:sigtype_witness=..moduletypeTid=sigtypettype_witness+=Tid:twitnessendtype'at=(moduleTidwithtypet='a)endmoduleUid:sigtype'vwitnessvalcreate:unit->'vwitnessvaltid:'vwitness->'vWitness.ttypetvalhide_type:'vwitness->tvalequal:t->t->boolvalcompare:t->t->intendtype'vuid='vUid.witness(** A value of type ['v uid] can be used to uniquely identify documents of type ['a]. *)typebinding=|KV:('vuid*'v)->binding(** A [binding] is returned when searching in a heterogeneous search index. *)typedoc=binding(** Documents are bindings. *)valindex:t->uid:'docuid->token:string->doc->unit(** [index t doc uid] indexes a given document [doc] in [t] with a
unique identifier [uid]. *)valadd_document:t->'docuid->key->'doc->unit(** Adds a new document to the indexer *)valapply:'vuid->default:'a->('v->'a)->doc->'a(** [apply uid ~default fn doc] runs the function [fn] on [doc]
if [uid] identifies the types as being the same, otherwise
it returns [default]. *)valapply_exn:'vuid->('v->'a)->doc->'a(** Like {! apply} except without a default return value so it may raise
[Invalid_argument _]. *)valadd_index:t->'docuid->('doc->string)->unit(** Adds a new index and re-indexes everything. *)valadd_indexes:t->'docuid->('doc->string)list->unit(** Same as {! add_index} but allows you to add multiple indexes
at a time before re-indexing occurs. *)valsearch:t->string->bindinglist(** [search t k] searches the index [t] using [k] returning the possible bindings. *)valempty:?santiser:(string->string)->?strategy:(string->stringlist)->?tokeniser:(string->stringlist)->unit->t(** Create a new empty search index.
@param sanitiser Run on each token to normalise them, by default this is {! String.lowercase_ascii}
@param strategy The indexing strategy, by default this is a prefixing strategy such that [abc] is indexed with [a], [ab] and [abc]
@param tokeniser Turns your documents into tokens. *)valpp:Format.formatter->t->unit(** Dumps the search index. *)endtype('uid,'doc)mono_index=|Mono:(moduleMonowithtypet='indexandtypedoc='docandtypeuid='uid)*'index->('uid,'doc)mono_indextype'keygeneric_index=|General:(moduleGenericwithtypet='indexandtypekey='key)*'index->'keygeneric_indexmoduletypeSigs=sig(** {1 Unique Identifiers} *)moduletypeUid=Uid(** {1 Search Index Implementations} *)(** {2 Term Frequency Inverse Document Frequency}
This search index uses the {{: https://en.wikipedia.org/wiki/Tf–idf} tf-idf}
approach to searching.*)moduleTfidf:sig(** A functor for building a Tfidf search index over one type of document. *)moduleMono(Uid:Uid)(Doc:sigtypetend):Monowithtypedoc=Doc.tandtypeuid=Uid.t(** A functor for building a Tfidf search index over different types of document. *)moduleGeneric(Uid:Uid):Genericwithtypekey=Uid.tend(** {1 Useful UID implementations} *)valcreate_uid:to_string:('uid->string)->cmp:('uid->'uid->int)->(moduleUidwithtypet='uid)(** [create_uid ~to_string ~cmp] allows you to create a {! Uid} module
from the provided functions. *)moduleUids:sigmoduleString:Uidwithtypet=stringmoduleInt:Uidwithtypet=intendmodulePrivate:sigmoduleWitness=Witnessendend