Skip to content

Commit

Permalink
Allow user to pass scan probability multiplier, resolves #3
Browse files Browse the repository at this point in the history
  • Loading branch information
Maarten committed Jan 8, 2017
1 parent b47b3db commit 59d42b9
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 37 deletions.
16 changes: 11 additions & 5 deletions src/earley/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ export function getViterbiParseFromChart<S, T>(state: State<S, T>, chart: Chart<
}



export function parseSentenceIntoChart<S, T>(Start: NonTerminal,
grammar: Grammar<T, S>,
tokens: T[]): [Chart<T, S>, number, State<S, T>] {
tokens: T[],
scanProbability?: (x: T, t: Terminal<T>[]) => S): [Chart<T, S>, number, State<S, T>] {
// ScanProbability scanProbability//TODO

const stateSets: Chart<T, S> = new Chart(grammar);
Expand Down Expand Up @@ -125,7 +125,7 @@ export function parseSentenceIntoChart<S, T>(Start: NonTerminal,
tokensWithWords.forEach(
(token: WordWithTypes<T>) => {
predict(i, grammar, stateSets);
scan(i, token, grammar.probabilityMapping.semiring, stateSets);
scan(i, token, grammar.probabilityMapping.semiring, stateSets, scanProbability);
complete(i + 1, stateSets, grammar);

const completedStates: State<S, T>[] = [];
Expand Down Expand Up @@ -153,8 +153,13 @@ export interface ParseTreeWithScore<T> {

export function getViterbiParse<S, T>(Start: NonTerminal,
grammar: Grammar<T, S>,
tokens: T[]): ParseTreeWithScore<T> {
const [chart, ignored, init] = parseSentenceIntoChart(Start, grammar, tokens);
tokens: T[],
scanProbability?: (x: T, t: Terminal<T>[]) => S): ParseTreeWithScore<T> {
const [chart, ignored, init] = parseSentenceIntoChart(Start, grammar, tokens, scanProbability);

if (!chart.has(init.rule, tokens.length,
0,
init.rule.right.length)) throw new Error("Could not parse sentence.");

const finalState = chart.getOrCreate(
tokens.length,
Expand All @@ -163,6 +168,7 @@ export function getViterbiParse<S, T>(Start: NonTerminal,
init.rule
);


const parseTree: ParseTree<T> = getViterbiParseFromChart(finalState, chart);
const toProbability = grammar.probabilityMapping.toProbability;
const finalScore = chart.getViterbiScore(finalState).innerScore;
Expand Down
13 changes: 6 additions & 7 deletions src/earley/scan.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {isNonTerminal, WordWithTypes} from "../grammar/category";
import {isNonTerminal, WordWithTypes, Terminal} from "../grammar/category";
import {Semiring} from "semiring";
import {Chart} from "./chart/chart";
import {getActiveCategory, State, advanceDot} from "./chart/state";
Expand All @@ -10,19 +10,18 @@ import {getActiveCategory, State, advanceDot} from "./chart/state";
* @param tokenPosition The start index of the scan.
* @param word
* @param types
* //@param scanProbability Function that provides the probability of scanning the given token at this position. Might be null for a probability of 1.0.
* @param scanProbability Function that provides the probability of scanning the given token at this position. Might be null for a probability of 1.0.
* @param sr
* @param stateSets
*/
export function scan<S, T>(tokenPosition: number,
{word, types}: WordWithTypes<T>,
// scanProbability:(x:T)=>number,//TODO
sr: Semiring<S>,
stateSets: Chart<T, S>) {
stateSets: Chart<T, S>,
scanProbability?: (x: T, t: Terminal<T>[]) => S) {
const changes: any[] = [];
// TODO
// const scanProb:number = !scanProbability ? NaN : scanProbability(tokenPosition);
const scanProb: S = sr.multiplicativeIdentity;

const scanProb: S = !!scanProbability ? scanProbability(word, types) : undefined;

/*
* Get all states that are active on a terminal
Expand Down
67 changes: 43 additions & 24 deletions test/earley/parser.spec.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,31 @@
import {NonTerminal, Terminal, Category} from "../../src/grammar/category";
import {NonTerminal, Terminal} from "../../src/grammar/category";
import {getViterbiParse, ParseTreeWithScore, Grammar} from "../../src/index";

import * as Mocha from 'mocha'
import {expect} from 'chai';
import {scan} from "../../src/earley/scan";
import {LogSemiring} from "semiring";
import {Chart} from "../../src/earley/chart/chart";
import {expect} from "chai";
import {g, A} from "../sample-grammar";
import {parseSentenceIntoChart} from "../../src/earley/parser";

//TODO
describe('parser', () => {
// TODO
describe("parser", () => {


it('should complete correctly', () => {
it("should complete correctly", () => {
// complete(
// 0,
// "e",
// LogSemiring,
// ss
// )
});
it('should predict correctly', () => {
it("should predict correctly", () => {
// complete(
// 0,
// "e",
// LogSemiring,
// ss
// )
});
it('should parse the man chase the man with a stick', () => {
it("should parse the man chase the man with a stick", () => {
const S: NonTerminal = "S";
const NP: NonTerminal = "NP";
const VP: NonTerminal = "VP";
Expand All @@ -47,8 +43,8 @@ describe('parser', () => {
const stick: Terminal<string> = (token) => !!token.match(/stick/);
const with_: Terminal<string> = (token) => !!token.match(/with/);

const grammar: Grammar<string,number> = Grammar.builder("test")
//.setSemiring(new LogSemiring()) // If not set, defaults to Log semiring which is probably what you want
const grammar: Grammar<string, number> = Grammar.builder("test")
// .setSemiring(new LogSemiring()) // If not set, defaults to Log semiring which is probably what you want
.addNewRule(
1.0, // Probability between 0.0 and 1.0, defaults to 1.0. The builder takes care of converting it to the semiring element
S, // Left hand side of the rule
Expand Down Expand Up @@ -88,18 +84,45 @@ describe('parser', () => {
grammar,
tokens
);
//console.log(JSON.stringify(viterbi.parseTree)); // {"category":"<start>","children":[{"category":"S","children":[{"category":"NP","children":[{"category":"Det","children":[{"token":"The","children":[ ]}]},{"category":"N","children":[{"token":"man","children":[]}]}]},{"category":"VP","children":[{"category":"TV","children":[{"token":"chased","children":[]}]},{"category":"NP","children":[{"category":"Det","children":[{"token":"the","children":[]}]},{"category":"N","children":[{"token":"man","c hildren":[]}]},{"category":"Mod","children":[{"token":"with","children":[]},{"category":"NP","children":[{"category":"Det","children":[{"token":"a", "children":[]}]},{"category":"N","children":[{"token":"stick","children":[]}]}]}]}]}]}]}]}
//console.log(viterbi.probability); // 0.6
//Parser.recognize(S, grammar, Tokens.tokenize("the", "stick", "chased", "the", "man"))
// console.log(JSON.stringify(viterbi.parseTree)); // {"category":"<start>","children":[{"category":"S","children":[{"category":"NP","children":[{"category":"Det","children":[{"token":"The","children":[ ]}]},{"category":"N","children":[{"token":"man","children":[]}]}]},{"category":"VP","children":[{"category":"TV","children":[{"token":"chased","children":[]}]},{"category":"NP","children":[{"category":"Det","children":[{"token":"the","children":[]}]},{"category":"N","children":[{"token":"man","c hildren":[]}]},{"category":"Mod","children":[{"token":"with","children":[]},{"category":"NP","children":[{"category":"Det","children":[{"token":"a", "children":[]}]},{"category":"N","children":[{"token":"stick","children":[]}]}]}]}]}]}]}]}
// console.log(viterbi.probability); // 0.6
// Parser.recognize(S, grammar, Tokens.tokenize("the", "stick", "chased", "the", "man"))
});


it('should parse aaaaa', () => {
const tokens = ["a", "a", "a", "e"];
const [chart, i, init] = parseSentenceIntoChart(
const tokens = ["a", "a", "a", "e"];
it("should deal with scan probability correctly", () => {
const p1 = getViterbiParse(
A,
g,
tokens
tokens,
(ignore, ignored) => {
return g.probabilityMapping.fromProbability(1.0);
}
).probability;

const p2 = getViterbiParse(
A,
g,
tokens,
(word, ignored) => {
return word === "a" ? g.probabilityMapping.fromProbability(0.5) : undefined;
}
).probability;

const eq = p2 * 2 * 2 * 2;
const epsilon = 0.0000000000000001;
expect(p1).to.be.above(eq - epsilon).and.below(eq + epsilon);
});

it("should parse aaae", () => {
const [chart, ignored, init] = parseSentenceIntoChart(
A,
g,
tokens,
(word, terminalTypes) => {
return g.probabilityMapping.fromProbability(1.0);
}
);

expect(chart.getCompletedStates(tokens.length).has(
Expand All @@ -108,9 +131,5 @@ it('should parse aaaaa', () => {
)
)).to.equal(true);

/*console.log(g.probabilityMapping.toProbability(
chart.viterbiScores.get(chart.getOrCreate(
tokens.length, 0, init.rule.right.length, init.rule
)).innerScore));*/
});
});
2 changes: 1 addition & 1 deletion test/sample-grammar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export const g:Grammar<string, number> = builder
.addNewRule(0.5, C, [D])
.addNewRule(0.5, D, [E])
.addNewRule(0.5, D, [a])
.addNewRule(0.5, E, [E,E])
.addNewRule(0.5, E, [E, E])
.addNewRule(0.5, E, [e])
//.addRule(0.1, E, [C])
.build();
Expand Down

0 comments on commit 59d42b9

Please sign in to comment.