public abstract class BaseSRMetric extends Object implements SRMetric
Modifier and Type | Class and Description |
---|---|
static class |
BaseSRMetric.SRConfig
Returns properties about the metric.
|
Constructor and Description |
---|
BaseSRMetric(String name,
Language language,
LocalPageDao dao,
Disambiguator disambig) |
Modifier and Type | Method and Description |
---|---|
void |
clearMostSimilarCache() |
protected static void |
configureBase(Configurator configurator,
BaseSRMetric sr,
com.typesafe.config.Config config) |
double[][] |
cosimilarity(int[] ids)
Construct symmetric comsimilarity matrix of Wikipedia ids in a given language.
|
double[][] |
cosimilarity(int[] wpRowIds,
int[] wpColIds)
Construct a cosimilarity matrix of Wikipedia ids in a given language.
|
double[][] |
cosimilarity(String[] phrases)
Construct symmetric cosimilarity matrix of phrases by mapping through local pages.
|
double[][] |
cosimilarity(String[] rowPhrases,
String[] colPhrases)
Construct a cosimilarity matrix of phrases.
|
protected void |
ensureMostSimilarTrained()
Throws an IllegalStateException if the model has not been mostSimilarTrained.
|
protected void |
ensureSimilarityTrained()
Throws an IllegalStateException if the model has not been mostSimilarTrained.
|
protected SRResultList |
getCachedMostSimilar(int wpId,
int numResults,
gnu.trove.set.TIntSet validIds)
If the cache exists, and contains at least numResults valid ids for the requested id, return it.
|
abstract BaseSRMetric.SRConfig |
getConfig() |
File |
getDataDir()
Returns the directory containing all data for the metric.
|
Disambiguator |
getDisambiguator() |
Language |
getLanguage() |
LocalPageDao |
getLocalPageDao() |
org.wikibrain.matrix.SparseMatrix |
getMostSimilarCache() |
protected File |
getMostSimilarMatrixPath() |
Normalizer |
getMostSimilarNormalizer() |
String |
getName() |
Normalizer |
getSimilarityNormalizer() |
SRResultList |
mostSimilar(int pageId,
int maxResults)
Find the most similar local pages to a local page within the same language.
|
abstract SRResultList |
mostSimilar(int pageId,
int maxResults,
gnu.trove.set.TIntSet validIds)
Find the most similar local pages to a local page.
|
SRResultList |
mostSimilar(String phrase,
int maxResults)
Find the most similar local pages to a phrase.
|
SRResultList |
mostSimilar(String phrase,
int maxResults,
gnu.trove.set.TIntSet validIds)
Find the most similar local pages to a phrase.
|
boolean |
mostSimilarIsTrained() |
protected double |
normalize(double score) |
protected SRResult |
normalize(SRResult sr)
Use the language-specific similarity normalizer to normalize a similarity if it exists.
|
protected SRResultList |
normalize(SRResultList srl)
Use the language-specific most similar normalizer to normalize a similarity if it exists.
|
void |
read()
Reads the metric from the current data directory.
|
void |
setBuildMostSimilarCache(boolean buildMostSimilarCache) |
void |
setDataDir(File dir)
Sets the data directory associated with the model.
|
void |
setMostSimilarCacheRowIds(gnu.trove.set.TIntSet rowIds) |
void |
setMostSimilarNormalizer(Normalizer n)
Sets the most similar normalizer
|
void |
setReadNormalizers(boolean shouldRead) |
void |
setSimilarityNormalizer(Normalizer n)
Sets the similarity normalizer.
|
abstract SRResult |
similarity(int pageId1,
int pageId2,
boolean explanations)
Determine the similarity between two local pages.
|
SRResult |
similarity(String phrase1,
String phrase2,
boolean explanations)
Determine the similarity between two strings in a given language by mapping through local pages.
|
boolean |
similarityIsTrained() |
void |
trainMostSimilar(Dataset dataset,
int numResults,
gnu.trove.set.TIntSet validIds)
Train the mostSimilar() function
The KnownSims may already be associated with Wikipedia ids (check wpId1 and wpId2).
|
void |
trainSimilarity(Dataset dataset)
Train the similarity() function.
|
void |
write()
Writes the metric to the current data directory.
|
void |
writeMostSimilarCache(int maxHits) |
void |
writeMostSimilarCache(int maxHits,
gnu.trove.set.TIntSet rowIds,
gnu.trove.set.TIntSet colIds)
Creates and writes a sparse matrix that records the top-k results for every page.
|
public BaseSRMetric(String name, Language language, LocalPageDao dao, Disambiguator disambig)
public abstract BaseSRMetric.SRConfig getConfig()
public File getDataDir()
SRMetric
getDataDir
in interface SRMetric
public String getName()
public void setDataDir(File dir)
SRMetric
setDataDir
in interface SRMetric
public void setMostSimilarNormalizer(Normalizer n)
SRMetric
setMostSimilarNormalizer
in interface SRMetric
public void setSimilarityNormalizer(Normalizer n)
SRMetric
setSimilarityNormalizer
in interface SRMetric
public boolean similarityIsTrained()
similarityIsTrained
in interface SRMetric
public boolean mostSimilarIsTrained()
mostSimilarIsTrained
in interface SRMetric
protected void ensureSimilarityTrained()
protected void ensureMostSimilarTrained()
protected SRResult normalize(SRResult sr)
sr
- protected SRResultList normalize(SRResultList srl)
srl
- protected double normalize(double score)
public void write() throws IOException
SRMetric
write
in interface SRMetric
IOException
public void setReadNormalizers(boolean shouldRead)
public void read() throws IOException
SRMetric
read
in interface SRMetric
IOException
public void trainSimilarity(Dataset dataset) throws DaoException
SRMetric
trainSimilarity
in interface SRMetric
dataset
- A gold standard datasetDaoException
public void trainMostSimilar(Dataset dataset, int numResults, gnu.trove.set.TIntSet validIds)
SRMetric
trainMostSimilar
in interface SRMetric
dataset
- A gold standard dataset.numResults
- The maximum number of similar articles computed per phrase.validIds
- The Wikipedia ids that should be considered in result sets. Null means all ids.public abstract SRResult similarity(int pageId1, int pageId2, boolean explanations) throws DaoException
SRMetric
similarity
in interface SRMetric
pageId1
- Id of the first page.pageId2
- Id of the second page.explanations
- Whether explanations should be created.DaoException
public SRResult similarity(String phrase1, String phrase2, boolean explanations) throws DaoException
SRMetric
similarity
in interface SRMetric
phrase1
- The first phrase.phrase2
- The second phrase.explanations
- Whether explanations should be created.DaoException
public SRResultList mostSimilar(int pageId, int maxResults) throws DaoException
SRMetric
mostSimilar
in interface SRMetric
pageId
- The id of the local page whose similarity we are examining.maxResults
- The maximum number of results to return.DaoException
public abstract SRResultList mostSimilar(int pageId, int maxResults, gnu.trove.set.TIntSet validIds) throws DaoException
SRMetric
mostSimilar
in interface SRMetric
pageId
- The id of the local page whose similarity we are examining.maxResults
- The maximum number of results to return.validIds
- The local page ids to be considered. Null means all ids in the language.DaoException
public SRResultList mostSimilar(String phrase, int maxResults) throws DaoException
SRMetric
mostSimilar
in interface SRMetric
phrase
- The phrase whose similarity we are examining.maxResults
- The maximum number of results to return.DaoException
public SRResultList mostSimilar(String phrase, int maxResults, gnu.trove.set.TIntSet validIds) throws DaoException
SRMetric
mostSimilar
in interface SRMetric
phrase
- The phrase whose similarity we are examining.maxResults
- The maximum number of results to return.validIds
- The local page ids to be considered. Null means all ids in the languageDaoException
public double[][] cosimilarity(int[] wpRowIds, int[] wpColIds) throws DaoException
SRMetric
cosimilarity
in interface SRMetric
DaoException
public double[][] cosimilarity(String[] rowPhrases, String[] colPhrases) throws DaoException
SRMetric
cosimilarity
in interface SRMetric
DaoException
public double[][] cosimilarity(int[] ids) throws DaoException
SRMetric
cosimilarity
in interface SRMetric
DaoException
public double[][] cosimilarity(String[] phrases) throws DaoException
SRMetric
cosimilarity
in interface SRMetric
DaoException
protected SRResultList getCachedMostSimilar(int wpId, int numResults, gnu.trove.set.TIntSet validIds) throws DaoException
wpId
- numResults
- validIds
- DaoException
public void writeMostSimilarCache(int maxHits) throws IOException, DaoException, WikiBrainException
public void writeMostSimilarCache(int maxHits, gnu.trove.set.TIntSet rowIds, gnu.trove.set.TIntSet colIds) throws IOException, DaoException, WikiBrainException
maxHits
- rowIds
- colIds
- IOException
DaoException
WikiBrainException
protected File getMostSimilarMatrixPath()
public Language getLanguage()
getLanguage
in interface SRMetric
public Disambiguator getDisambiguator()
public LocalPageDao getLocalPageDao()
public Normalizer getMostSimilarNormalizer()
getMostSimilarNormalizer
in interface SRMetric
public Normalizer getSimilarityNormalizer()
getSimilarityNormalizer
in interface SRMetric
public org.wikibrain.matrix.SparseMatrix getMostSimilarCache()
public void clearMostSimilarCache()
public void setBuildMostSimilarCache(boolean buildMostSimilarCache)
public void setMostSimilarCacheRowIds(gnu.trove.set.TIntSet rowIds)
protected static void configureBase(Configurator configurator, BaseSRMetric sr, com.typesafe.config.Config config) throws ConfigurationException
ConfigurationException
Copyright © 2014. All rights reserved.