1
Wish list / Re: What about using a special language for NPC interaction ?
« on: September 13, 2006, 01:30:23 pm »
Code for MarkovDict.pas file :
------------------------------------------------------------------------------------------
// Copyright (C) 2006 Jorge Aldo G. de F. Junior.
// This file received some modifications from FPCBot Authors
// This file is part of Pascal Markov Text Generation Library (Called PMFGL).
// PMFGL is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
// PMFGL is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with PMFGL; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Unit MarkovDict;
{$mode objfpc}{$h+}
Interface
Uses
Classes, SysUtils;
Const
ccStartToken = '<<';
ccEndToken = '>>';
ccImpulse = 10;
Type
{ TMarkovDict }
TMarkovDict = Class
Private
fName : String;
fWords : TStringList;
function GetCount: Integer;
function GetMarkovCount: Integer;
function GetTransitions(index: integer): TStringList;
Public
Constructor Create(Arq : String);
Destructor Destroy; Override;
Procedure Load;
Procedure Flush;
Function InsertWord(St : AnsiString): Integer;
Function FindWord(St : AnsiString): Integer;
Procedure ImpulsePair(S1, S2 : AnsiString);
Property Words : TStringList Read fWords Write fWords;
Property Transitions[Index : Integer]: TStringList Read GetTransitions;
Property Count : Integer Read GetCount;
Property MarkovCount : Integer Read GetMarkovCount;
End;
Implementation
Function TMarkovDict.GetCount: Integer;
Begin
Result := fWords.Count;
End;
Function TMarkovDict.GetMarkovCount: Integer;
Var
Ctrl : Integer;
TransitionList : TStringList;
Begin
Result := 0;
For Ctrl := 0 To fWords.Count-1 Do
Begin
TransitionList := Transitions[Ctrl];
If Assigned(TransitionList) Then
Result := Result + TransitionList.Count;
End;
End;
Function TMarkovDict.GetTransitions(Index: Integer): TStringList;
Begin
Result := TStringList(fWords.Objects[Index]);
End;
Constructor TMarkovDict.Create(Arq : String);
Begin
Inherited Create;
fName := Arq;
fWords := TStringList.Create;
fWords.Sorted := true;
fWords.Duplicates := dupIgnore;
End;
Destructor TMarkovDict.Destroy;
Var
Ctrl : Integer;
Begin
fName := '';
For Ctrl := 0 To fWords.Count -1 Do
If Assigned(Transitions[Ctrl]) Then
Transitions[Ctrl].Free;
fWords.Free;
Inherited Destroy;
End;
// Loads the list from a file
Procedure TMarkovDict.Load;
Var
Handler : TextFile;
Word : String;
Line : String;
NewTransitions : TStringList;
HitCount : PtrInt;
Begin
If Not(FileExists(fName)) Then
Exit;
AssignFile(Handler, fName);
Reset(Handler);
While Not(Eof(Handler)) Do
Begin
ReadLn(Handler, Word);
ReadLn(Handler, Line);
If Length(Line) > 0 Then
Begin
NewTransitions := TStringList.Create;
Repeat
Readln(Handler, HitCount);
NewTransitions.AddObject(Line, TObject(HitCount));
ReadLn(Handler, line);
Until Length(Line) = 0;
End
Else
NewTransitions := Nil;
FWords.AddObject(Word, NewTransitions);
End;
CloseFile(Handler);
End;
// Saves the list to a file
Procedure TMarkovDict.Flush;
Var
W1,
W2 : Integer;
MarkovText : TextFile;
Transition : TStringList;
Begin
Assign(MarkovText, fName);
ReWrite(MarkovText);
For W1 := 0 to fWords.Count-1 Do
Begin
WriteLn(MarkovText, fWords[W1]);
Transition:=Transitions[W1];
If Assigned(Transition) Then
Begin
For W2 := 0 to Transition.Count -1 Do
Begin
WriteLn(MarkovText, Transition[W2]);
WriteLn(MarkovText, PtrInt(Transition.Objects[W2]));
End;
End;
WriteLn(MarkovText);
End;
CloseFile(MarkovText);
End;
// adds a new word to the list returning his value or
// returns the value of a word if it is already in the list
Function TMarkovDict.InsertWord(St : AnsiString): Integer;
Begin
Result := FWords.Add(St);
End;
// Finds a word in the list, returning -1 if the word
// isn't already there
Function TMarkovDict.FindWord(St : AnsiString): Integer;
Begin
FindWord := fWords.IndexOf(St);
End;
Procedure TMarkovDict.ImpulsePair(S1, S2: AnsiString);
Var
W1,
W2 : Integer;
TransitionList : TStringList;
Begin
W1 := FWords.IndexOf(S1);
If W1 < 0 Then
W1 := FWords.Add(S1);
TransitionList := Transitions[W1];
If Not(Assigned(TransitionList)) Then
Begin
TransitionList := TStringList.Create;
FWords.Objects[W1] := TransitionList;
End;
W2 := TransitionList.IndexOf(S2);
If W2 < 0 Then
W2 := TransitionList.Add(S2);
TransitionList.Objects[w2] := TObject(ptrint(TransitionList.Objects[w2]) + ccImpulse);
End;
End.
------------------------------------------------------------------------------------------
Code for Markov.pas file :
------------------------------------------------------------------------------------------
// Copyright (C) 2006 Jorge Aldo G. de F. Junior.
// This file received some modifications from FPCBot Authors
// This file is part of Pascal Markov Text Generation Library (Called PMFGL).
// PMFGL is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
// PMFGL is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with PMFGL; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Unit Markov;
{$mode objfpc}{$h+}
Interface
Uses MarkovDict, Classes, SysUtils;
Type
// Holds the last words used by the engine in order to
// avoid recursions
TLastWordsUsed = Class
Private
fBuffer : Array Of String;
fPos : Integer;
Public
Constructor Create(Size : Cardinal);
Destructor Destroy; Override;
Procedure WordSaid(W : String);
Function WordUsed(W : String): Integer;
Procedure Show;
End;
// A generic markov text generator
{ TMarkov }
TMarkov = Class
Private
fErrorMargin : Byte; // Deviation
fThreshold : Byte; // Limiar level for a construction to be considered "right"
fDict : TMarkovDict;
fUsed : TLastWordsUsed;
fUserUsed : TLastWordsUsed;
Function StartCode : Integer;
Function EndCode : Integer;
// Number of words in the dictionary
Function WordsCount : Integer;
// Number of entries in the markov chain
Function MarkovCount : Integer;
// A random word to follow K, with at least E% usage
Function RandomWord(const S1: string; E : Integer): string;
Public
// D = Markov data file
// M = Deviation
// T = Threshold
Constructor Create(D : String; M, T : Byte);
Destructor Destroy; Override;
// Use talkto when you need to write a new phrase into the database
// without generating a response (I.E. Write-Only)
Procedure TalkTo(W : TStringList);
// Use talkfrom when you want to generate a new phrase from the
// current database
Function TalkFrom: AnsiString;
// Use talk to write to and generate a response at the same time
Function Talk(W : TStringList): AnsiString;
// Change a word in the dictionary, correcting a mispelling ?
// S1 is the current word, S2 is the word to wich it should be changed
Procedure Correct(S1, S2 : AnsiString);
// Error margin is the max deviation from a central threshold level
Property ErrorMargin : Byte Read fErrorMargin Write fErrorMargin;
// Threshold is the level where a construction is marked as good
// language one
Property Threshold : Byte Read fThreshold Write fThreshold;
// The ammount of words in the dictionary
Property DictionaryWords : Integer Read WordsCount;
// The ammount of words in the markov chain
Property EntriesMarkov : Integer Read MarkovCount;
End;
Implementation
// Generates (guaranteed) a random number wich value in the [X, Y] Range
Function RandomFromTo(X, Y : Integer): Integer;
Var
Tmp : Integer;
Begin
Repeat
Tmp := X + Random(Y - X);
Until ((Tmp >= X) and (Tmp <= Y));
RandomFromTo := Tmp;
End;
// TMarkov
Constructor TMarkov.Create(D : String; M, T : Byte);
Begin
Inherited Create;
fDict := TMarkovDict.Create(D);
fDict.Load;
// fTable := TMarkovTable.Create(P);
// fTable.Load;
fUsed := TLastWordsUsed.Create(128);
fUserUsed := TLastWordsUsed.Create(64);
fErrorMargin := M;
fThreshold := T;
// make sure the startToken and EndToken are part of it.
fDict.InsertWord(ccStartToken);
fDict.InsertWord(ccEndToken);
WriteLn('Phrase start code : ', StartCode);
WriteLn('Phrase end code : ', EndCode);
End;
Destructor TMarkov.Destroy;
Begin
fDict.Flush;
fDict.Free;
fUsed.Free;
fUserUsed.Free;
Inherited Destroy;
End;
Procedure TMarkov.TalkTo(W : TStringList);
Var
Ctrl : Integer;
Begin
If Assigned(W) And (W.Count > 0) Then
Begin
// Puts every word in the dictionary if not already
// and mark as context input
For Ctrl := 0 To W.Count - 1 Do
Begin
fDict.InsertWord(W[Ctrl]);
fUserUsed.WordSaid(W[Ctrl]);
End;
// Reinforce start word behaviour
fDict.ImpulsePair(ccStartToken, W[0]);
If W.Count > 1 Then
// Insert/reinforce word pairs in the transition table
For Ctrl := 0 To W.Count - 2 Do
fDict.ImpulsePair(W[Ctrl], W[Ctrl + 1]);
// Reinforce end word behaviour
fDict.ImpulsePair(W[W.Count - 1], ccEndToken);
End;
End;
function TMarkov.StartCode: Integer;
begin
Result := fDict.FindWord(ccStartToken);
end;
function TMarkov.EndCode: Integer;
begin
Result := fDict.FindWord(ccEndToken);
end;
// Number of words in the dictionary
Function TMarkov.WordsCount : Integer;
Begin
WordsCount := fDict.Count;
End;
// Number of entries in the markov chain
Function TMarkov.MarkovCount : Integer;
Begin
MarkovCount := fDict.MarkovCount;
End;
Function TMarkov.RandomWord(Const S1 : String; E : Integer): string;
Var
Ctrl : Integer;
Highest : Integer;
Partial : Array Of Integer;
Possible : Array Of Boolean;
Coeff : Double;
Possibles : Integer;
Chosen : Int64;
Transitions : TStringList;
Begin
Result := ccEndToken;
Transitions := FDict.Transitions[FDict.FindWord(S1)];
If (Transitions = Nil) Or (Transitions.Count = 0) Then
Exit;
Highest := 0;
Possibles := 0;
SetLength(Partial, Transitions.Count);
SetLength(Possible, Transitions.Count);
Write(' Transition matrix with ', Transitions.Count ,' coefficients : ');
// Generates the transition matrix for K
// from K to each possible word
For Ctrl := 0 to Transitions.Count - 1 Do
Begin
// Coefficient : four words in the context cancels out a word being too much used
Coeff := (fUserUsed.WordUsed(Transitions[Ctrl]) + 1) / ((fUsed.WordUsed(Transitions[Ctrl]) + 1) * 4);
Partial[Ctrl] := Round(Ptrint(Transitions.Objects[Ctrl]) * Coeff * 100);
If Ctrl = EndCode Then
Partial[Ctrl] := Round(Partial[Ctrl] * 0.75);
If Partial[Ctrl] > Highest Then
Highest := Partial[Ctrl];
End;
WriteLn(Highest, ' highest.');
// Transforms from "hits" into percentual (%) values
// relative to the highest scoring word in the markov transition table
// If a value is higher than the threshold (E) then this is a "possible"
// word to follow K in a normal text
Write(' Apply thresholds : ');
For Ctrl := 0 to Transitions.Count - 1 Do
Begin
If Partial[Ctrl] > 0 Then
Possible[Ctrl] := Round((Partial[Ctrl]) / (Highest / 100)) > E
Else
Possible[Ctrl] := False;
If Possible[Ctrl] Then
Inc(Possibles);
End;
WriteLn(Possibles, ' words can follow.');
If Possibles = 0 Then
Exit;
// Choose a random from the possible words
Chosen := Random(Possibles); // number to skip
Write(' Select keyword : ');
Ctrl := 0;
While (Chosen >= Ord(Possible[Ctrl])) Do
Begin
If Possible[Ctrl] Then
Dec(Chosen);
Inc(Ctrl);
end;
Result := Transitions[Ctrl];
WriteLn(ctrl, ' ', Result, ' selected.');
End;
Function TMarkov.TalkFrom: AnsiString;
Var
Temp : AnsiString;
Key : Integer;
AWord : AnsiString;
Begin
// Start a fresh phrase
WriteLn(' TalkFrom');
WriteLn(' {');
AWord := ccStartToken;
Temp := '';
Repeat
AWord := RandomWord(AWord, RandomFromTo(fThreshold - fErrorMargin, fThreshold + fErrorMargin));
If AWord <> ccEndToken Then
Begin
// Append the next word in the phrase
Temp := Temp + AWord + ' ';
fUsed.WordSaid(AWord);
End;
// The next word will be the most probable from the current one
Until AWord = ccEndToken;
WriteLn(' }');
TalkFrom := Temp;
End;
Function TMarkov.Talk(W : TStringList): AnsiString;
Var
Temp : Ansistring;
Ctrl : Integer;
Ctrl2 : Integer;
Begin
Temp := '';
Ctrl2 := 0;
If Assigned(W) Then
Begin
TalkTo(W);
WriteLn;
WriteLn('Generating text');
WriteLn('{');
WriteLn(' Input words : ');
WriteLn(' {');
Write(' ');
For Ctrl := 0 To W.Count - 1 Do
Write(W[Ctrl], ' ');
WriteLn;
WriteLn(' }');
WriteLn(' Context box');
WriteLn(' {');
Write(' ');
fUserUsed.Show;
WriteLn(' }');
WriteLn(' Used words');
WriteLn(' {');
Write(' ');
fUsed.Show;
WriteLn(' }');
WriteLn(' Iterating');
WriteLn(' {');
While (Temp = '') And (Ctrl2 <= 1024) Do
Begin
Temp := TalkFrom;
Inc(Ctrl2);
WriteLn(' Try : ', Ctrl2);
End;
WriteLn(' }');
WriteLn(' Result = ', Temp);
WriteLn('}');
WriteLn;
End;
Talk := Temp;
End;
// Change a word in the dictionary, correcting a mispelling ?
// S1 is the current word, S2 is the word to wich it should be changed
Procedure TMarkov.Correct(S1, S2 : AnsiString);
Var
S1Pos : Integer;
Begin
// WriteLn('Searching for mispelled words');
// WriteLn('{');
// S1Pos := fDict.FindWord(S1);
// If (S1Pos = StartCode) Or (S1Pos = EndCode) Then
// Exit;
// While S1Pos >= 0 Do
// Begin
// WriteLn(' Correcting mispelled word : ', S1, ' to ', S2);
// fDict.Words[S1Pos] := S2;
// S1Pos := fDict.FindWord(S1);
// End;
// WriteLn('}');
Raise Exception.Create('Not yet implemented');
End;
// TLastWordsUsed
Constructor TLastWordsUsed.Create(Size : Cardinal);
Var
Ctrl : Cardinal;
Begin
Inherited Create;
SetLength(fBuffer, Size);
For Ctrl := 0 To (Size - 1) Do
fBuffer[Ctrl] := '';
fPos := 0;
End;
Destructor TLastWordsUsed.Destroy;
Begin
SetLength(fBuffer, 0);
fPos := -1;
Inherited Destroy;
End;
Procedure TLastWordsUsed.WordSaid(W : String);
Begin
If fPos >= Length(fBuffer) Then
fPos := 0;
fBuffer[fPos] := W;
Inc(fPos);
End;
Function TLastWordsUsed.WordUsed(W : String): Integer;
Var
Ctrl : Cardinal;
Temp : Integer;
Begin
Temp := 0;
For Ctrl := 0 To (Length(fBuffer) - 1) Do
If fBuffer[Ctrl] = W Then
Inc(Temp);
WordUsed := Temp;
End;
Procedure TLastWordsUsed.Show();
Var
Ctrl : Cardinal;
Begin
For Ctrl := 0 To (Length(fBuffer) - 1) Do
If fBuffer[Ctrl] <> '' Then
Write(fBuffer[Ctrl], ' ');
WriteLn;
End;
End.
------------------------------------------------------------------------------------------
This code will use a not hidden markov model, so its not usefull for our porpuses as it is, it's just a example (for those who can understand object pascal) of a Markov model.
In our case the way it will work is described as a Hidden Markov Model, where we have already a markov chain filled and need to infer what the current pattern matches best (its a pattern recognition tool).
Looking at the way this sample code iterates trought the markov chain, we can see that a correctly phrase will iterate until the >> marker is found, changing from fixed startphrase/endphrase markers into polimorfic ones, where each end of phrase markers triggers a event by the NPC, we can solve three problems :
Incomplete phrases can be infered from the markov chain, tring to generate text foreward from the incompletion point (Ie : "I am nice" is stored in the markov chain, the user types "I am"; "I am" fails to reach a end state, but iterating the markov chain towards the most probably next state, the system choses that the most likely word to follow "am" is "nice", and them tries to reach a end state from there).
Having polimorft end states means that each endstate is itself a tag about the semantic mean of this phrase, wich can be used by the NPC to trigger some event. Using polimorfic phrase start markers we can have multiple equal phrases that means different things in different contexts (the context chooses the phrase start to be used, them the markov chain iteration will end up in a statistically correct end of phrase marker with the context that this phrases triggers).
Each NPC should have his own markov chain (and be able to reconize a limited set of phrases in order to decrease server load), and receive "education" about phrases and meanings...
All this works out of the box, i just need to translate this into a C++ class and do some modifications...
The most basic difference is that in the example code, the user defines the markov states, where in our pattern recognition system the markov chain will run already "educated" and will only be used to try to solve incomplete phrases and determine the meaning of them (IE : They will stay locked in-game, only NPC dialog editors should be able to modify the markov table and its triggers [and what those triggers means]...)
Ahh.. and whit some randomness in the way the server solves each phrase will be an incentive for players to be more concise and carefull about what to say to a NPC... Saying incomplete phrases could lead to the NPC to react in ways not expected (or worse, make the NPC take offense from the phrase said)... Imagine a player saying "I would like to ..." and the NPC taking this as a "I would like to FIGHT YOU UNTIL DEATH !" LOL, NOW RUN GUY RUN ! I think the human misunderstaind will be a nice realistic touch...
------------------------------------------------------------------------------------------
// Copyright (C) 2006 Jorge Aldo G. de F. Junior.
// This file received some modifications from FPCBot Authors
// This file is part of Pascal Markov Text Generation Library (Called PMFGL).
// PMFGL is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
// PMFGL is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with PMFGL; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Unit MarkovDict;
{$mode objfpc}{$h+}
Interface
Uses
Classes, SysUtils;
Const
ccStartToken = '<<';
ccEndToken = '>>';
ccImpulse = 10;
Type
{ TMarkovDict }
TMarkovDict = Class
Private
fName : String;
fWords : TStringList;
function GetCount: Integer;
function GetMarkovCount: Integer;
function GetTransitions(index: integer): TStringList;
Public
Constructor Create(Arq : String);
Destructor Destroy; Override;
Procedure Load;
Procedure Flush;
Function InsertWord(St : AnsiString): Integer;
Function FindWord(St : AnsiString): Integer;
Procedure ImpulsePair(S1, S2 : AnsiString);
Property Words : TStringList Read fWords Write fWords;
Property Transitions[Index : Integer]: TStringList Read GetTransitions;
Property Count : Integer Read GetCount;
Property MarkovCount : Integer Read GetMarkovCount;
End;
Implementation
Function TMarkovDict.GetCount: Integer;
Begin
Result := fWords.Count;
End;
Function TMarkovDict.GetMarkovCount: Integer;
Var
Ctrl : Integer;
TransitionList : TStringList;
Begin
Result := 0;
For Ctrl := 0 To fWords.Count-1 Do
Begin
TransitionList := Transitions[Ctrl];
If Assigned(TransitionList) Then
Result := Result + TransitionList.Count;
End;
End;
Function TMarkovDict.GetTransitions(Index: Integer): TStringList;
Begin
Result := TStringList(fWords.Objects[Index]);
End;
Constructor TMarkovDict.Create(Arq : String);
Begin
Inherited Create;
fName := Arq;
fWords := TStringList.Create;
fWords.Sorted := true;
fWords.Duplicates := dupIgnore;
End;
Destructor TMarkovDict.Destroy;
Var
Ctrl : Integer;
Begin
fName := '';
For Ctrl := 0 To fWords.Count -1 Do
If Assigned(Transitions[Ctrl]) Then
Transitions[Ctrl].Free;
fWords.Free;
Inherited Destroy;
End;
// Loads the list from a file
Procedure TMarkovDict.Load;
Var
Handler : TextFile;
Word : String;
Line : String;
NewTransitions : TStringList;
HitCount : PtrInt;
Begin
If Not(FileExists(fName)) Then
Exit;
AssignFile(Handler, fName);
Reset(Handler);
While Not(Eof(Handler)) Do
Begin
ReadLn(Handler, Word);
ReadLn(Handler, Line);
If Length(Line) > 0 Then
Begin
NewTransitions := TStringList.Create;
Repeat
Readln(Handler, HitCount);
NewTransitions.AddObject(Line, TObject(HitCount));
ReadLn(Handler, line);
Until Length(Line) = 0;
End
Else
NewTransitions := Nil;
FWords.AddObject(Word, NewTransitions);
End;
CloseFile(Handler);
End;
// Saves the list to a file
Procedure TMarkovDict.Flush;
Var
W1,
W2 : Integer;
MarkovText : TextFile;
Transition : TStringList;
Begin
Assign(MarkovText, fName);
ReWrite(MarkovText);
For W1 := 0 to fWords.Count-1 Do
Begin
WriteLn(MarkovText, fWords[W1]);
Transition:=Transitions[W1];
If Assigned(Transition) Then
Begin
For W2 := 0 to Transition.Count -1 Do
Begin
WriteLn(MarkovText, Transition[W2]);
WriteLn(MarkovText, PtrInt(Transition.Objects[W2]));
End;
End;
WriteLn(MarkovText);
End;
CloseFile(MarkovText);
End;
// adds a new word to the list returning his value or
// returns the value of a word if it is already in the list
Function TMarkovDict.InsertWord(St : AnsiString): Integer;
Begin
Result := FWords.Add(St);
End;
// Finds a word in the list, returning -1 if the word
// isn't already there
Function TMarkovDict.FindWord(St : AnsiString): Integer;
Begin
FindWord := fWords.IndexOf(St);
End;
Procedure TMarkovDict.ImpulsePair(S1, S2: AnsiString);
Var
W1,
W2 : Integer;
TransitionList : TStringList;
Begin
W1 := FWords.IndexOf(S1);
If W1 < 0 Then
W1 := FWords.Add(S1);
TransitionList := Transitions[W1];
If Not(Assigned(TransitionList)) Then
Begin
TransitionList := TStringList.Create;
FWords.Objects[W1] := TransitionList;
End;
W2 := TransitionList.IndexOf(S2);
If W2 < 0 Then
W2 := TransitionList.Add(S2);
TransitionList.Objects[w2] := TObject(ptrint(TransitionList.Objects[w2]) + ccImpulse);
End;
End.
------------------------------------------------------------------------------------------
Code for Markov.pas file :
------------------------------------------------------------------------------------------
// Copyright (C) 2006 Jorge Aldo G. de F. Junior.
// This file received some modifications from FPCBot Authors
// This file is part of Pascal Markov Text Generation Library (Called PMFGL).
// PMFGL is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
// PMFGL is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with PMFGL; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Unit Markov;
{$mode objfpc}{$h+}
Interface
Uses MarkovDict, Classes, SysUtils;
Type
// Holds the last words used by the engine in order to
// avoid recursions
TLastWordsUsed = Class
Private
fBuffer : Array Of String;
fPos : Integer;
Public
Constructor Create(Size : Cardinal);
Destructor Destroy; Override;
Procedure WordSaid(W : String);
Function WordUsed(W : String): Integer;
Procedure Show;
End;
// A generic markov text generator
{ TMarkov }
TMarkov = Class
Private
fErrorMargin : Byte; // Deviation
fThreshold : Byte; // Limiar level for a construction to be considered "right"
fDict : TMarkovDict;
fUsed : TLastWordsUsed;
fUserUsed : TLastWordsUsed;
Function StartCode : Integer;
Function EndCode : Integer;
// Number of words in the dictionary
Function WordsCount : Integer;
// Number of entries in the markov chain
Function MarkovCount : Integer;
// A random word to follow K, with at least E% usage
Function RandomWord(const S1: string; E : Integer): string;
Public
// D = Markov data file
// M = Deviation
// T = Threshold
Constructor Create(D : String; M, T : Byte);
Destructor Destroy; Override;
// Use talkto when you need to write a new phrase into the database
// without generating a response (I.E. Write-Only)
Procedure TalkTo(W : TStringList);
// Use talkfrom when you want to generate a new phrase from the
// current database
Function TalkFrom: AnsiString;
// Use talk to write to and generate a response at the same time
Function Talk(W : TStringList): AnsiString;
// Change a word in the dictionary, correcting a mispelling ?
// S1 is the current word, S2 is the word to wich it should be changed
Procedure Correct(S1, S2 : AnsiString);
// Error margin is the max deviation from a central threshold level
Property ErrorMargin : Byte Read fErrorMargin Write fErrorMargin;
// Threshold is the level where a construction is marked as good
// language one
Property Threshold : Byte Read fThreshold Write fThreshold;
// The ammount of words in the dictionary
Property DictionaryWords : Integer Read WordsCount;
// The ammount of words in the markov chain
Property EntriesMarkov : Integer Read MarkovCount;
End;
Implementation
// Generates (guaranteed) a random number wich value in the [X, Y] Range
Function RandomFromTo(X, Y : Integer): Integer;
Var
Tmp : Integer;
Begin
Repeat
Tmp := X + Random(Y - X);
Until ((Tmp >= X) and (Tmp <= Y));
RandomFromTo := Tmp;
End;
// TMarkov
Constructor TMarkov.Create(D : String; M, T : Byte);
Begin
Inherited Create;
fDict := TMarkovDict.Create(D);
fDict.Load;
// fTable := TMarkovTable.Create(P);
// fTable.Load;
fUsed := TLastWordsUsed.Create(128);
fUserUsed := TLastWordsUsed.Create(64);
fErrorMargin := M;
fThreshold := T;
// make sure the startToken and EndToken are part of it.
fDict.InsertWord(ccStartToken);
fDict.InsertWord(ccEndToken);
WriteLn('Phrase start code : ', StartCode);
WriteLn('Phrase end code : ', EndCode);
End;
Destructor TMarkov.Destroy;
Begin
fDict.Flush;
fDict.Free;
fUsed.Free;
fUserUsed.Free;
Inherited Destroy;
End;
Procedure TMarkov.TalkTo(W : TStringList);
Var
Ctrl : Integer;
Begin
If Assigned(W) And (W.Count > 0) Then
Begin
// Puts every word in the dictionary if not already
// and mark as context input
For Ctrl := 0 To W.Count - 1 Do
Begin
fDict.InsertWord(W[Ctrl]);
fUserUsed.WordSaid(W[Ctrl]);
End;
// Reinforce start word behaviour
fDict.ImpulsePair(ccStartToken, W[0]);
If W.Count > 1 Then
// Insert/reinforce word pairs in the transition table
For Ctrl := 0 To W.Count - 2 Do
fDict.ImpulsePair(W[Ctrl], W[Ctrl + 1]);
// Reinforce end word behaviour
fDict.ImpulsePair(W[W.Count - 1], ccEndToken);
End;
End;
function TMarkov.StartCode: Integer;
begin
Result := fDict.FindWord(ccStartToken);
end;
function TMarkov.EndCode: Integer;
begin
Result := fDict.FindWord(ccEndToken);
end;
// Number of words in the dictionary
Function TMarkov.WordsCount : Integer;
Begin
WordsCount := fDict.Count;
End;
// Number of entries in the markov chain
Function TMarkov.MarkovCount : Integer;
Begin
MarkovCount := fDict.MarkovCount;
End;
Function TMarkov.RandomWord(Const S1 : String; E : Integer): string;
Var
Ctrl : Integer;
Highest : Integer;
Partial : Array Of Integer;
Possible : Array Of Boolean;
Coeff : Double;
Possibles : Integer;
Chosen : Int64;
Transitions : TStringList;
Begin
Result := ccEndToken;
Transitions := FDict.Transitions[FDict.FindWord(S1)];
If (Transitions = Nil) Or (Transitions.Count = 0) Then
Exit;
Highest := 0;
Possibles := 0;
SetLength(Partial, Transitions.Count);
SetLength(Possible, Transitions.Count);
Write(' Transition matrix with ', Transitions.Count ,' coefficients : ');
// Generates the transition matrix for K
// from K to each possible word
For Ctrl := 0 to Transitions.Count - 1 Do
Begin
// Coefficient : four words in the context cancels out a word being too much used
Coeff := (fUserUsed.WordUsed(Transitions[Ctrl]) + 1) / ((fUsed.WordUsed(Transitions[Ctrl]) + 1) * 4);
Partial[Ctrl] := Round(Ptrint(Transitions.Objects[Ctrl]) * Coeff * 100);
If Ctrl = EndCode Then
Partial[Ctrl] := Round(Partial[Ctrl] * 0.75);
If Partial[Ctrl] > Highest Then
Highest := Partial[Ctrl];
End;
WriteLn(Highest, ' highest.');
// Transforms from "hits" into percentual (%) values
// relative to the highest scoring word in the markov transition table
// If a value is higher than the threshold (E) then this is a "possible"
// word to follow K in a normal text
Write(' Apply thresholds : ');
For Ctrl := 0 to Transitions.Count - 1 Do
Begin
If Partial[Ctrl] > 0 Then
Possible[Ctrl] := Round((Partial[Ctrl]) / (Highest / 100)) > E
Else
Possible[Ctrl] := False;
If Possible[Ctrl] Then
Inc(Possibles);
End;
WriteLn(Possibles, ' words can follow.');
If Possibles = 0 Then
Exit;
// Choose a random from the possible words
Chosen := Random(Possibles); // number to skip
Write(' Select keyword : ');
Ctrl := 0;
While (Chosen >= Ord(Possible[Ctrl])) Do
Begin
If Possible[Ctrl] Then
Dec(Chosen);
Inc(Ctrl);
end;
Result := Transitions[Ctrl];
WriteLn(ctrl, ' ', Result, ' selected.');
End;
Function TMarkov.TalkFrom: AnsiString;
Var
Temp : AnsiString;
Key : Integer;
AWord : AnsiString;
Begin
// Start a fresh phrase
WriteLn(' TalkFrom');
WriteLn(' {');
AWord := ccStartToken;
Temp := '';
Repeat
AWord := RandomWord(AWord, RandomFromTo(fThreshold - fErrorMargin, fThreshold + fErrorMargin));
If AWord <> ccEndToken Then
Begin
// Append the next word in the phrase
Temp := Temp + AWord + ' ';
fUsed.WordSaid(AWord);
End;
// The next word will be the most probable from the current one
Until AWord = ccEndToken;
WriteLn(' }');
TalkFrom := Temp;
End;
Function TMarkov.Talk(W : TStringList): AnsiString;
Var
Temp : Ansistring;
Ctrl : Integer;
Ctrl2 : Integer;
Begin
Temp := '';
Ctrl2 := 0;
If Assigned(W) Then
Begin
TalkTo(W);
WriteLn;
WriteLn('Generating text');
WriteLn('{');
WriteLn(' Input words : ');
WriteLn(' {');
Write(' ');
For Ctrl := 0 To W.Count - 1 Do
Write(W[Ctrl], ' ');
WriteLn;
WriteLn(' }');
WriteLn(' Context box');
WriteLn(' {');
Write(' ');
fUserUsed.Show;
WriteLn(' }');
WriteLn(' Used words');
WriteLn(' {');
Write(' ');
fUsed.Show;
WriteLn(' }');
WriteLn(' Iterating');
WriteLn(' {');
While (Temp = '') And (Ctrl2 <= 1024) Do
Begin
Temp := TalkFrom;
Inc(Ctrl2);
WriteLn(' Try : ', Ctrl2);
End;
WriteLn(' }');
WriteLn(' Result = ', Temp);
WriteLn('}');
WriteLn;
End;
Talk := Temp;
End;
// Change a word in the dictionary, correcting a mispelling ?
// S1 is the current word, S2 is the word to wich it should be changed
Procedure TMarkov.Correct(S1, S2 : AnsiString);
Var
S1Pos : Integer;
Begin
// WriteLn('Searching for mispelled words');
// WriteLn('{');
// S1Pos := fDict.FindWord(S1);
// If (S1Pos = StartCode) Or (S1Pos = EndCode) Then
// Exit;
// While S1Pos >= 0 Do
// Begin
// WriteLn(' Correcting mispelled word : ', S1, ' to ', S2);
// fDict.Words[S1Pos] := S2;
// S1Pos := fDict.FindWord(S1);
// End;
// WriteLn('}');
Raise Exception.Create('Not yet implemented');
End;
// TLastWordsUsed
Constructor TLastWordsUsed.Create(Size : Cardinal);
Var
Ctrl : Cardinal;
Begin
Inherited Create;
SetLength(fBuffer, Size);
For Ctrl := 0 To (Size - 1) Do
fBuffer[Ctrl] := '';
fPos := 0;
End;
Destructor TLastWordsUsed.Destroy;
Begin
SetLength(fBuffer, 0);
fPos := -1;
Inherited Destroy;
End;
Procedure TLastWordsUsed.WordSaid(W : String);
Begin
If fPos >= Length(fBuffer) Then
fPos := 0;
fBuffer[fPos] := W;
Inc(fPos);
End;
Function TLastWordsUsed.WordUsed(W : String): Integer;
Var
Ctrl : Cardinal;
Temp : Integer;
Begin
Temp := 0;
For Ctrl := 0 To (Length(fBuffer) - 1) Do
If fBuffer[Ctrl] = W Then
Inc(Temp);
WordUsed := Temp;
End;
Procedure TLastWordsUsed.Show();
Var
Ctrl : Cardinal;
Begin
For Ctrl := 0 To (Length(fBuffer) - 1) Do
If fBuffer[Ctrl] <> '' Then
Write(fBuffer[Ctrl], ' ');
WriteLn;
End;
End.
------------------------------------------------------------------------------------------
This code will use a not hidden markov model, so its not usefull for our porpuses as it is, it's just a example (for those who can understand object pascal) of a Markov model.
In our case the way it will work is described as a Hidden Markov Model, where we have already a markov chain filled and need to infer what the current pattern matches best (its a pattern recognition tool).
Looking at the way this sample code iterates trought the markov chain, we can see that a correctly phrase will iterate until the >> marker is found, changing from fixed startphrase/endphrase markers into polimorfic ones, where each end of phrase markers triggers a event by the NPC, we can solve three problems :
Incomplete phrases can be infered from the markov chain, tring to generate text foreward from the incompletion point (Ie : "I am nice" is stored in the markov chain, the user types "I am"; "I am" fails to reach a end state, but iterating the markov chain towards the most probably next state, the system choses that the most likely word to follow "am" is "nice", and them tries to reach a end state from there).
Having polimorft end states means that each endstate is itself a tag about the semantic mean of this phrase, wich can be used by the NPC to trigger some event. Using polimorfic phrase start markers we can have multiple equal phrases that means different things in different contexts (the context chooses the phrase start to be used, them the markov chain iteration will end up in a statistically correct end of phrase marker with the context that this phrases triggers).
Each NPC should have his own markov chain (and be able to reconize a limited set of phrases in order to decrease server load), and receive "education" about phrases and meanings...
All this works out of the box, i just need to translate this into a C++ class and do some modifications...
The most basic difference is that in the example code, the user defines the markov states, where in our pattern recognition system the markov chain will run already "educated" and will only be used to try to solve incomplete phrases and determine the meaning of them (IE : They will stay locked in-game, only NPC dialog editors should be able to modify the markov table and its triggers [and what those triggers means]...)
Ahh.. and whit some randomness in the way the server solves each phrase will be an incentive for players to be more concise and carefull about what to say to a NPC... Saying incomplete phrases could lead to the NPC to react in ways not expected (or worse, make the NPC take offense from the phrase said)... Imagine a player saying "I would like to ..." and the NPC taking this as a "I would like to FIGHT YOU UNTIL DEATH !" LOL, NOW RUN GUY RUN ! I think the human misunderstaind will be a nice realistic touch...
