diff --git a/FindFiles.dll b/FindFiles.dll new file mode 100644 index 0000000..501a8dc Binary files /dev/null and b/FindFiles.dll differ diff --git a/FindFiles_Plugin.png b/FindFiles_Plugin.png new file mode 100644 index 0000000..1dcb16d Binary files /dev/null and b/FindFiles_Plugin.png differ diff --git a/FindFiles_Settings.png b/FindFiles_Settings.png new file mode 100644 index 0000000..bce0dad Binary files /dev/null and b/FindFiles_Settings.png differ diff --git a/src/FindFiles.csproj b/src/FindFiles.csproj new file mode 100644 index 0000000..3cf87e9 --- /dev/null +++ b/src/FindFiles.csproj @@ -0,0 +1,213 @@ + + + + Debug + AnyCPU + 9.0.30729 + 2.0 + {343E9A1B-9868-453C-AE3B-A7F63ABA5A8B} + Library + Properties + FindFilesPlugin + FindFiles + + + + + 3.5 + v3.5 + + + publish\ + true + Disk + false + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + false + true + + + + true + full + true + ..\..\FlashDevelop\FlashDevelop\Bin\Debug\Plugins\ + DEBUG;TRACE + prompt + 4 + false + AllRules.ruleset + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + AllRules.ruleset + + + x86 + bin\Debug\ + TRACE + true + AllRules.ruleset + + + x86 + bin\Release\ + TRACE + true + AllRules.ruleset + + + + + + + + + + + + + + + + + True + True + Resources.resx + + + + Form + + + FindFilesForm.cs + + + + Form + + + QuickOutlineForm.cs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {EFD07485-9A64-4EEC-94E7-ACBD4DA5CA93} + FlashDevelop + False + + + {61885F70-B4DC-4B44-852D-5D6D03F2A734} + PluginCore + False + + + {4EBF2653-9654-4E40-880E-0046B3D6210E} + ASCompletion + + + + + FindFilesForm.cs + + + QuickOutlineForm.cs + + + ResXFileCodeGenerator + Resources.Designer.cs + + + + copy "$(ProjectDir)bin\$(ConfigurationName)\FindFiles.dll" "$(SolutionDir)\FlashDevelop\Bin\Debug\Plugins" + + + + + + + + + + False + .NET Framework 3.5 SP1 Client Profile + false + + + False + .NET Framework 3.5 SP1 + true + + + False + Windows Installer 3.1 + true + + + \ No newline at end of file diff --git a/src/FindFiles.csproj.user b/src/FindFiles.csproj.user new file mode 100644 index 0000000..108979c --- /dev/null +++ b/src/FindFiles.csproj.user @@ -0,0 +1,21 @@ + + + + Program + C:\Users\sbatista\Dropbox\Work\Utilities\C#\FlashDevelop Plugins\FD4\FlashDevelop\Bin\Debug\FlashDevelop.exe + + + Program + C:\Users\sbatista\Dropbox\Work\Utilities\C#\FlashDevelop Plugins\FD4\FlashDevelop\Bin\Debug\FlashDevelop.exe + + + + + + + + + en-US + false + + \ No newline at end of file diff --git a/src/FindFiles.sln.docstates.suo b/src/FindFiles.sln.docstates.suo new file mode 100644 index 0000000..c23b173 Binary files /dev/null and b/src/FindFiles.sln.docstates.suo differ diff --git a/src/FindFiles.suo b/src/FindFiles.suo new file mode 100644 index 0000000..af4e055 Binary files /dev/null and b/src/FindFiles.suo differ diff --git a/src/Forms/FindFilesForm.Designer.cs b/src/Forms/FindFilesForm.Designer.cs new file mode 100644 index 0000000..6dd4c34 --- /dev/null +++ b/src/Forms/FindFilesForm.Designer.cs @@ -0,0 +1,227 @@ +namespace FindFilesPlugin +{ + partial class FindFilesForm + { + /// + /// Required designer variable. + /// + private System.ComponentModel.IContainer components = null; + + /// + /// Clean up any resources being used. + /// + /// true if managed resources should be disposed; otherwise, false. + protected override void Dispose(bool disposing) + { + if (disposing && (components != null)) + { + components.Dispose(); + } + base.Dispose(disposing); + } + + #region Windows Form Designer generated code + + /// + /// Required method for Designer support - do not modify + /// the contents of this method with the code editor. + /// + private void InitializeComponent() + { + this.components = new System.ComponentModel.Container(); + System.ComponentModel.ComponentResourceManager resources = new System.ComponentModel.ComponentResourceManager(typeof(FindFilesForm)); + this.label1 = new System.Windows.Forms.Label(); + this.textBox = new System.Windows.Forms.TextBox(); + this.dataGridView = new System.Windows.Forms.DataGridView(); + this.filenameHeader = new System.Windows.Forms.DataGridViewTextBoxColumn(); + this.filepathHeader = new System.Windows.Forms.DataGridViewTextBoxColumn(); + this.labelDirectoryInfo = new System.Windows.Forms.Label(); + this.labelSearchTime = new System.Windows.Forms.Label(); + this.btnReload = new System.Windows.Forms.Button(); + this.toolTipReload = new System.Windows.Forms.ToolTip(this.components); + this.checkInProjectOnly = new System.Windows.Forms.CheckBox(); + this.toolTipInProjectOnly = new System.Windows.Forms.ToolTip(this.components); + ((System.ComponentModel.ISupportInitialize)(this.dataGridView)).BeginInit(); + this.SuspendLayout(); + // + // label1 + // + this.label1.AutoSize = true; + this.label1.Location = new System.Drawing.Point(12, 9); + this.label1.Name = "label1"; + this.label1.Size = new System.Drawing.Size(72, 13); + this.label1.TabIndex = 0; + this.label1.Text = "Search string:"; + // + // textBox + // + this.textBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.textBox.Font = new System.Drawing.Font("Courier New", 9.75F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(204))); + this.textBox.Location = new System.Drawing.Point(12, 28); + this.textBox.Name = "textBox"; + this.textBox.Size = new System.Drawing.Size(560, 22); + this.textBox.TabIndex = 0; + this.textBox.TextChanged += new System.EventHandler(this.textBox_TextChanged); + this.textBox.KeyDown += new System.Windows.Forms.KeyEventHandler(this.textBox_KeyDown); + // + // dataGridView + // + this.dataGridView.AllowUserToAddRows = false; + this.dataGridView.AllowUserToDeleteRows = false; + this.dataGridView.AllowUserToResizeRows = false; + this.dataGridView.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) + | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.dataGridView.AutoSizeColumnsMode = System.Windows.Forms.DataGridViewAutoSizeColumnsMode.Fill; + this.dataGridView.BackgroundColor = System.Drawing.Color.OldLace; + this.dataGridView.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize; + this.dataGridView.Columns.AddRange(new System.Windows.Forms.DataGridViewColumn[] { + this.filenameHeader, + this.filepathHeader}); + this.dataGridView.Cursor = System.Windows.Forms.Cursors.Arrow; + this.dataGridView.EnableHeadersVisualStyles = false; + this.dataGridView.Location = new System.Drawing.Point(12, 56); + this.dataGridView.MultiSelect = false; + this.dataGridView.Name = "dataGridView"; + this.dataGridView.ReadOnly = true; + this.dataGridView.RightToLeft = System.Windows.Forms.RightToLeft.No; + this.dataGridView.RowHeadersVisible = false; + this.dataGridView.SelectionMode = System.Windows.Forms.DataGridViewSelectionMode.FullRowSelect; + this.dataGridView.ShowCellErrors = false; + this.dataGridView.ShowCellToolTips = false; + this.dataGridView.ShowEditingIcon = false; + this.dataGridView.ShowRowErrors = false; + this.dataGridView.Size = new System.Drawing.Size(560, 194); + this.dataGridView.StandardTab = true; + this.dataGridView.TabIndex = 3; + this.dataGridView.TabStop = false; + this.dataGridView.CellMouseDoubleClick += new System.Windows.Forms.DataGridViewCellMouseEventHandler(this.dataGridView_CellMouseDoubleClick); + this.dataGridView.KeyDown += new System.Windows.Forms.KeyEventHandler(this.dataGridView_KeyDown); + // + // filenameHeader + // + this.filenameHeader.FillWeight = 76.14215F; + this.filenameHeader.HeaderText = "File Name"; + this.filenameHeader.MinimumWidth = 100; + this.filenameHeader.Name = "filenameHeader"; + this.filenameHeader.ReadOnly = true; + this.filenameHeader.SortMode = System.Windows.Forms.DataGridViewColumnSortMode.Programmatic; + // + // filepathHeader + // + this.filepathHeader.FillWeight = 123.8579F; + this.filepathHeader.HeaderText = "File Path"; + this.filepathHeader.MinimumWidth = 100; + this.filepathHeader.Name = "filepathHeader"; + this.filepathHeader.ReadOnly = true; + this.filepathHeader.SortMode = System.Windows.Forms.DataGridViewColumnSortMode.Programmatic; + // + // labelDirectoryInfo + // + this.labelDirectoryInfo.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left))); + this.labelDirectoryInfo.AutoSize = true; + this.labelDirectoryInfo.Location = new System.Drawing.Point(9, 256); + this.labelDirectoryInfo.Name = "labelDirectoryInfo"; + this.labelDirectoryInfo.Size = new System.Drawing.Size(70, 13); + this.labelDirectoryInfo.TabIndex = 4; + this.labelDirectoryInfo.Text = "Directory Info"; + // + // labelSearchTime + // + this.labelSearchTime.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); + this.labelSearchTime.Location = new System.Drawing.Point(470, 256); + this.labelSearchTime.Name = "labelSearchTime"; + this.labelSearchTime.Size = new System.Drawing.Size(122, 26); + this.labelSearchTime.TabIndex = 6; + this.labelSearchTime.Text = "Search Time"; + // + // btnReload + // + this.btnReload.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right))); + this.btnReload.BackgroundImage = ((System.Drawing.Image)(resources.GetObject("btnReload.BackgroundImage"))); + this.btnReload.BackgroundImageLayout = System.Windows.Forms.ImageLayout.Stretch; + this.btnReload.FlatAppearance.BorderSize = 0; + this.btnReload.FlatStyle = System.Windows.Forms.FlatStyle.Flat; + this.btnReload.Location = new System.Drawing.Point(547, 2); + this.btnReload.Name = "btnReload"; + this.btnReload.Size = new System.Drawing.Size(24, 24); + this.btnReload.TabIndex = 7; + this.btnReload.TabStop = false; + this.btnReload.UseVisualStyleBackColor = true; + this.btnReload.Click += new System.EventHandler(this.btnReload_Click); + // + // toolTipReload + // + this.toolTipReload.AutoPopDelay = 15000; + this.toolTipReload.InitialDelay = 250; + this.toolTipReload.IsBalloon = true; + this.toolTipReload.ReshowDelay = 100; + this.toolTipReload.ToolTipIcon = System.Windows.Forms.ToolTipIcon.Info; + this.toolTipReload.ToolTipTitle = "Reload Directories"; + // + // checkInProjectOnly + // + this.checkInProjectOnly.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right))); + this.checkInProjectOnly.AutoSize = true; + this.checkInProjectOnly.Location = new System.Drawing.Point(446, 8); + this.checkInProjectOnly.Name = "checkInProjectOnly"; + this.checkInProjectOnly.Size = new System.Drawing.Size(95, 17); + this.checkInProjectOnly.TabIndex = 8; + this.checkInProjectOnly.Text = "In Project Only"; + this.checkInProjectOnly.UseVisualStyleBackColor = true; + this.checkInProjectOnly.CheckedChanged += new System.EventHandler(this.checkInProjectOnly_CheckedChanged); + // + // toolTipInProjectOnly + // + this.toolTipInProjectOnly.AutoPopDelay = 15000; + this.toolTipInProjectOnly.InitialDelay = 250; + this.toolTipInProjectOnly.IsBalloon = true; + this.toolTipInProjectOnly.ReshowDelay = 100; + this.toolTipInProjectOnly.ToolTipIcon = System.Windows.Forms.ToolTipIcon.Info; + this.toolTipInProjectOnly.ToolTipTitle = "In Project Only"; + // + // FindFilesForm + // + this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); + this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; + this.ClientSize = new System.Drawing.Size(584, 275); + this.Controls.Add(this.checkInProjectOnly); + this.Controls.Add(this.btnReload); + this.Controls.Add(this.labelSearchTime); + this.Controls.Add(this.labelDirectoryInfo); + this.Controls.Add(this.dataGridView); + this.Controls.Add(this.textBox); + this.Controls.Add(this.label1); + this.KeyPreview = true; + this.MaximizeBox = false; + this.MinimizeBox = false; + this.MinimumSize = new System.Drawing.Size(350, 200); + this.Name = "FindFilesForm"; + this.ShowIcon = false; + this.ShowInTaskbar = false; + this.StartPosition = System.Windows.Forms.FormStartPosition.CenterScreen; + this.Text = "Find Files"; + this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.OpenResourceForm_FormClosing); + this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.OpenResourceForm_KeyDown); + ((System.ComponentModel.ISupportInitialize)(this.dataGridView)).EndInit(); + this.ResumeLayout(false); + this.PerformLayout(); + + } + + #endregion + + private System.Windows.Forms.Label label1; + private System.Windows.Forms.TextBox textBox; + private System.Windows.Forms.DataGridView dataGridView; + private System.Windows.Forms.DataGridViewTextBoxColumn filenameHeader; + private System.Windows.Forms.DataGridViewTextBoxColumn filepathHeader; + private System.Windows.Forms.Label labelDirectoryInfo; + private System.Windows.Forms.Label labelSearchTime; + private System.Windows.Forms.Button btnReload; + private System.Windows.Forms.ToolTip toolTipReload; + private System.Windows.Forms.CheckBox checkInProjectOnly; + private System.Windows.Forms.ToolTip toolTipInProjectOnly; + } +} \ No newline at end of file diff --git a/src/Forms/FindFilesForm.cs b/src/Forms/FindFilesForm.cs new file mode 100644 index 0000000..67b9ea3 --- /dev/null +++ b/src/Forms/FindFilesForm.cs @@ -0,0 +1,347 @@ +using System; +using System.Collections.Generic; +using System.Drawing; +using System.Windows.Forms; +using PluginCore; +using System.Data; +using System.IO; +using Win32; +using System.Threading; + +namespace FindFilesPlugin +{ + public partial class FindFilesForm : Form + { + private bool isInitialized = false; + + private static string SearchText = ""; + private SearchManager searchManager; + + private Font nameFont; + private Font pathFont; + private PluginMain plugin; + private Settings pluginSettings; + private HiPerfTimer timer; + + private String directoryInfoText; + + public static List cashedFiles = new List(); + private static volatile EventHandler itemsLoadedDelegate; + private static volatile bool filesLoading; + private static Thread fileLoaderThread; + + public FindFilesForm(PluginMain plugin) + { + this.plugin = plugin; + pluginSettings = plugin.Settings as Settings; + InitializeComponent(); + + this.toolTipReload.SetToolTip(this.btnReload, "\nThe FindFiles plugin caches the file list the first time it opens to speed up search time.\nThis button causes the plugin to relaunch and refresh the search results.\nUse this if you added or removed files in the search directories since you first started FindFiles."); + this.toolTipInProjectOnly.SetToolTip(this.checkInProjectOnly, "\nOnly files in the project directory will be searched. Changing this requires the file cache to be reset."); + + timer = new HiPerfTimer(); + + // Restore saved settings + if (pluginSettings.FindFilesFormSize.Width > MinimumSize.Width) + Size = pluginSettings.FindFilesFormSize; + if (pluginSettings.FileNameWidth > 0) + dataGridView.Columns[0].Width = pluginSettings.FileNameWidth; + if (pluginSettings.FilePathWidth > 0) + dataGridView.Columns[1].Width = pluginSettings.FilePathWidth; + + if (PluginBase.CurrentProject == null) + checkInProjectOnly.Enabled = false; + else + checkInProjectOnly.Checked = pluginSettings.SearchInProjectOnly; + + Text = "Find Files " + pluginSettings.SearchFilter; + + nameFont = new Font("Courier New", 10, FontStyle.Bold); + pathFont = new Font("Courier New", 8, FontStyle.Italic); + + dataGridView.Columns[0].DefaultCellStyle.Font = nameFont; + dataGridView.Columns[1].DefaultCellStyle.Font = pathFont; + + directoryInfoText = String.Empty; + if (pluginSettings.SearchDirectory != pluginSettings.DefaultEmptyString) + if (Directory.Exists(pluginSettings.SearchDirectory)) + directoryInfoText = pluginSettings.SearchDirectory; + if ((pluginSettings.SearchProject || pluginSettings.SearchInProjectOnly) && PluginBase.CurrentProject != null) + if (directoryInfoText != String.Empty) + directoryInfoText = "Project Folder" + " && " + directoryInfoText; + else + directoryInfoText = "Project Folder"; + + labelSearchTime.Text = "Search Time: ..."; + } + + protected override void OnShown(EventArgs e) + { + base.OnShown(e); + CreateFileList(); + textBox.Focus(); + textBox.Text = SearchText; + textBox.Select(0, SearchText.Length); + isInitialized = true; + } + + #region FILE LOADING THREADING + private void CreateFileList() + { + searchManager = new SearchManager(plugin, dataGridView); + + if (cashedFiles.Count == 0) + { + btnReload.Enabled = false; + checkInProjectOnly.Enabled = false; + labelDirectoryInfo.Text = "Calculating File Count..."; + labelSearchTime.Text = ""; + + string updatingCacheText = "Updating file cache, please wait..."; + dataGridView.Rows.Add(updatingCacheText); + + // Resize name column to fit the text + Size s = new Size(); + Font font = dataGridView.Columns[0].DefaultCellStyle.Font; + s = TextRenderer.MeasureText(updatingCacheText, font); + dataGridView.Columns[0].Width = s.Width + 10 /* some padding */; + dataGridView.ClearSelection(); + dataGridView.Enabled = false; + + filesLoading = true; + itemsLoadedDelegate = new EventHandler(FilesLoadedCallback); + fileLoaderThread = new Thread(new ThreadStart(LoadFilesThread)); + fileLoaderThread.Start(); + } + else + { + filesLoading = false; + + foreach (string file in cashedFiles) + { + searchManager.AddFileToSearchList(file); + } + + if (SearchText != "") + { + textBox.Text = SearchText; + textBox.Select(0, SearchText.Length); + } + + labelDirectoryInfo.Text = directoryInfoText + " Files: " + searchManager.fileList.Count; + } + } + + private void LoadFilesThread() + { + try + { + cashedFiles = plugin.GetFiles(checkInProjectOnly.Checked); + // Remove any files that are in the excluded directories + string[] excludedDirectories = plugin.settingObject.ExcludedDirectories.Split(new char[] { ',', ';' }); + for (int i = 0; i < cashedFiles.Count; ) + { + bool fileRemoved = false; + + if (excludedDirectories.Length > 0 && excludedDirectories[0] != plugin.settingObject.DefaultEmptyString) + { + foreach (string excludedDir in excludedDirectories) + { + if (cashedFiles[i].StartsWith(excludedDir)) + { + fileRemoved = true; + break; + } + } + } + + if (!fileRemoved) + { + searchManager.AddFileToSearchList(cashedFiles[i]); + i++; + } + else + cashedFiles.RemoveAt(i); + } + + if (FindFilesForm.itemsLoadedDelegate != null) + Invoke(FindFilesForm.itemsLoadedDelegate); + } + catch (ThreadAbortException) + { + // Do nothing, we're good + PluginMain.ClearCachedFiles(); + FindFilesForm.itemsLoadedDelegate = null; + } + } + + private void FilesLoadedCallback(object sender, EventArgs e) + { + btnReload.Enabled = true; + + if (PluginBase.CurrentProject == null) + { + checkInProjectOnly.Enabled = false; + } + else + { + checkInProjectOnly.Enabled = true; + checkInProjectOnly.Checked = pluginSettings.SearchInProjectOnly; + } + + labelDirectoryInfo.Text = directoryInfoText + " Files: " + searchManager.fileList.Count; + labelSearchTime.Text = "Search Time: ..."; + + dataGridView.Enabled = true; + RefreshDataGrid(); + filesLoading = false; + } + #endregion + + private void RefreshDataGrid() + { + dataGridView.Rows.Clear(); + if (textBox.Text.Length > 0) + { + timer.Start(); + searchManager.DoSearch(textBox.Text); + timer.Stop(); + labelSearchTime.Text = "Search Time: " + String.Format("{0:0.###}", timer.Duration); + + if (timer.Duration < 0.25) + labelSearchTime.ForeColor = Color.Green; + else if (timer.Duration >= 0.25 && timer.Duration < 0.5) + labelSearchTime.ForeColor = Color.Blue; + else if (timer.Duration >= 0.5 && timer.Duration < 0.8) + labelSearchTime.ForeColor = Color.SaddleBrown; + else + labelSearchTime.ForeColor = Color.Red; + } + else + { + labelSearchTime.Text = "Search Time: ..."; + labelSearchTime.ForeColor = Color.Black; + } + if (dataGridView.Rows.Count > 0) + dataGridView.CurrentCell = dataGridView[0, 0]; + } + + private void Navigate() + { + if (dataGridView.SelectedRows.Count > 0) + { + string path = (string)dataGridView.SelectedRows[0].Cells[1].Value; + // Get Full File Path + if (pluginSettings.SearchDirectory != pluginSettings.DefaultEmptyString) + { + if (File.Exists(pluginSettings.SearchDirectory + path)) + path = pluginSettings.SearchDirectory + path; + } + if (PluginBase.CurrentProject != null) + { + String projectFolder = Path.GetDirectoryName(PluginBase.CurrentProject.ProjectPath); + if (File.Exists(projectFolder + path)) + path = projectFolder + path; + } + PluginBase.MainForm.OpenEditableDocument(path); + Close(); + } + } + + private void OpenResourceForm_KeyDown(object sender, KeyEventArgs e) + { + if (e.KeyCode == Keys.Escape) + { + Close(); + } + else if (e.KeyCode == Keys.Enter) + { + e.Handled = true; + Navigate(); + } + } + + private void textBox_KeyDown(object sender, KeyEventArgs e) + { + if (e.KeyCode == Keys.Down) + { + dataGridView.Focus(); + SendKeys.Send("{DOWN}"); + e.Handled = true; + } + else if (e.KeyCode == Keys.Up) + { + dataGridView.Focus(); + SendKeys.Send("{UP}"); + e.Handled = true; + } + } + + private void textBox_TextChanged(object sender, EventArgs e) + { + if (!filesLoading && (SearchText != textBox.Text || dataGridView.RowCount == 0)) + RefreshDataGrid(); + + SearchText = textBox.Text; + } + + private void OpenResourceForm_FormClosing(object sender, FormClosingEventArgs e) + { + FindFilesForm.itemsLoadedDelegate = null; + if (fileLoaderThread != null && fileLoaderThread.IsAlive) + fileLoaderThread.Abort(); + + pluginSettings.FindFilesFormSize = Size; + pluginSettings.FileNameWidth = dataGridView.Columns[0].Width; + pluginSettings.FilePathWidth = dataGridView.Columns[1].Width; + } + + private void dataGridView_CellMouseDoubleClick(object sender, DataGridViewCellMouseEventArgs e) + { + Navigate(); + } + + private void dataGridView_KeyDown(object sender, KeyEventArgs e) + { + if (dataGridView.RowCount == 0) + { + textBox.Focus(); + e.Handled = true; + return; + } + + if (e.KeyCode == Keys.Down && dataGridView.CurrentRow.Index < dataGridView.Rows.Count - 1) + { + dataGridView.ClearSelection(); + dataGridView.CurrentCell = dataGridView[0, dataGridView.CurrentRow.Index + 1]; + dataGridView.Focus(); + } + else if (e.KeyCode == Keys.Up && dataGridView.CurrentRow.Index > 0) + { + dataGridView.ClearSelection(); + dataGridView.CurrentCell = dataGridView[0, dataGridView.CurrentRow.Index - 1]; + dataGridView.Focus(); + } + else if (e.KeyCode == Keys.Up && dataGridView.CurrentRow.Index <= 0) + textBox.Focus(); + + e.Handled = true; + } + + private void btnReload_Click(object sender, EventArgs e) + { + dataGridView.Rows.Clear(); + PluginMain.ClearCachedFiles(); + CreateFileList(); + } + + private void checkInProjectOnly_CheckedChanged(object sender, EventArgs e) + { + if (isInitialized) + { + pluginSettings.SearchInProjectOnly = checkInProjectOnly.Checked; + btnReload_Click(sender, e); + } + } + } +} \ No newline at end of file diff --git a/src/Forms/FindFilesForm.resx b/src/Forms/FindFilesForm.resx new file mode 100644 index 0000000..387c159 --- /dev/null +++ b/src/Forms/FindFilesForm.resx @@ -0,0 +1,221 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + True + + + True + + + + + iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8 + YQUAAAAJcEhZcwAACwwAAAsMAT9AIsgAAAAGYktHRAD/AP8A/6C9p5MAAAAHdElNRQfVCBcTBgQ8zn1t + AAAS60lEQVRoQ6VZB1hU19a9GBNFRZEivQ7DMNQZqhRBQJEqoogFfSpgQwXpinQUxI4i0lEEwYpd7Brj + A8Ve0CSaoklMjL95MWoswfXvcwHFgoGE77sf7czMWWfvvfba63AAuH/7OMWKutuGC/wtQ/SqJBN1LlsE + a982H699z2ys1u/0PKPfv5dO0j1mFaqXZz1NP8x2pkDz335m2+v/8eaNlneT6efPhaqP7H5AL0D+iUmg + BsQj1GHoowpDL1WI/NRhPEoDBACG3qoQeqnw30V+qtD06NM8wFvmhIwbN/LfAvlHABSDOA9RsGKTJFgb + dNKwCNaBY5QhPJeYYdQGW4zZbI8xNQMRRE/Ldzv4F1nBI8ccgxKMYDq6BZT6kF7o78bd7ePNpXGqXLd/ + AqZLAGQDOYHORNl9oolKryyn6GJQnAi+a6UIrLJF4EY7jFxvi4BSG4woscaI4nYP/R5QZsP/n61jwLxW + WoDSiSKjRpFTA0WxSc6Xs+gqiE4DUJnEzTSbPuCFXbgADlFC+KyWtGy6vGXDw4uk8Ckwh2e+MTzWGr15 + 8sXwXGcC7wIz+BZKaJ0lv56BGV1lhxH0egaEpZ7msD4verlzizoDgnPh+lBNFXYKgHoYl2ExS+WVQ7SQ + 0sQCIzfQadIHDy+Wwos2555ngOGFUszYHIDU/bOw9FgiltGz/NgC+nkeUvfNQuS2sZhcOQyjyux5MH4E + mAdC7zO6aiCGZpnyNcTSSslD5rzMEE6+IyByXlyEzvB+D02DND/OQAbZnIzR7D7FllHqGDRPTJu05D/Q + r0gCDzpZ19UCRO0Yh6rGddhxeQM2nVuH8oaVKDy9GKtPpPMAcg7HY/HBGCyqi0L6gQgeTNSOYASWOhAQ + cz5yAaXWGFXB0s8aVmF6EPmqQddf/mb/AE6pPYhu7twQhaEyNzSH9eYJ4qMA+k7kPrNdoLVrcJIxXNNM + KKetMKLUitLBGG6r9ZF2YBYO3azF4S93YeflSmy+UExA8rH+TC5K/rscBV8sRt7nmVh1PAXLjiZiMQFZ + eDAKafvnIGXfTCTtm47I7WMQUGzDp5Y/vf/IckqrSjvYhOu3gPDre1sruIda/xGcrrJPtwMaQ/q8YqlG + lA3JRF0G4FWHKWQ2r3/hkEwTPrTsZPyKJXBbI8CEDW44eesAGr//Ase/3oe6G9ux52o1gdiIrRfLUH2+ + kI9IRWMeys+uRBFFgwFZSUCWHp2P7EOxyKyLRPr+2UjePxPJe6dj/HoX+BSat4Kw4UnBLtyAB2E6RusX + Ou3nIl9VmI3RgucyCwTvdITlZJ2OAQjiPh3tkK6LoTlmCKA89Skyh9MqLaz9fCG++7+vcfmnRtR/exyf + 36rDsa/24uiXu3GEnhO39+HU93U48vVOVJzNQ+6RDGysz8OOpjJUXlyDwoYcrDqZShGZj6xDMcioo5Qi + IKxuJlV68CTAR4IYK7DSlk7aAEJPFYj91eGaasJHh6dpYjHJBG0CoPV+BESJn+k5Z+s/8lxGb0Y5711o + CocVGvwJ3//jZ3x5/xou/3gW5++cxtnvTuLc3S9w5edGArIbYcX+sJ6rA+GkvhBN7g/RJEUIJ8jDNEQF + w9MHIbkmAuX1K1DUkI2VJ5L5+lh0MJqARCJjfwSmVvu8BWLURlu4JIl50ghkNULAWOEHbRoI8xYAzW+l + kH4G9+ngZYIzPrlS+OVbtp68Jr745jAev/gDd3/7Frd+vYGbv1xB072LuPXgOi7/fAZhRQEwnaUAy0gN + mMxSbtYP631JJZirU57AVeqFyh4Whyk9kU7Vgvg/AyCZoo20rVGouJiLtacyWusjDosoIllU7GPLB1FN + WPB9hPUUtum2jbf1FgbA4kMAjDN7LR++zpLf/IgSKzjn6mAjpQL7evj0Ae79fpcHcefhN7j/+Cc0fH8c + jul6sInXgjRW9b5RpNxi+Smc9rv0xzlyn1AfcdWbIrtWOFH+pXiCEoIyPVB5IQ/FDUuReyKVr4+cwwlU + I9F8r2BM91YzbNcYOwSgl86FuucJHw4vsKIOK8HkCg9+8+zr8bNHePjkVzx4fB9PXzzBwevbYZ2sBodk + Pdgmah3XnMEpdqYBqU3ipCZTlW6YharDZoYAOXsSUH5uBdZQNFYcS6JnARX4HAyjhuhfTAfZvqO3/twO + wF/vsRCBULRfoVbitlrwynm5DqZVBuDJ80c8iOZXf/Hfbz+4AYtkJVglqcI0QX6VQVbXdIxVlLqjzWy9 + x0aTFWEQ1B8rdqcSWy3hSWL1yTTknUxH2CYfvuF9EEA1pRBpMKqB9wG0naLZ4t6ObnkGV91XC2GdrYh1 + n2e/joZnrhmcs4iLkxULOnPqbWvkgjllwTTZClGYfLPxVEWMz/JBTWMJtl8txYYzq6l/LKP+kU1AFhH1 + LuS7PN+x34lCEAFgIpJotWMA7EMpGt0pGnEuq/X+dKJo+OZJEL1tIgYtISG3WPecIJP7rDMAOHeuO9VH + vDRK7ZH1XOLyZGtUnS7Eqe/qsK+pBtsuUf84V8jXWxl1cgaklJrhvN0hralETfSdGqC5gwF4yXmvlEQI + F3M9P7YRAqI9cIXqTgIC73wLOC/X+584XVa3M5tna/qHckusEzR4SvTMsMK1XxrR9OACNcT92H99C3Zf + rcKOSxuom5eQHCmgHrIG6xtWofJsPkaW2FKTa2Gltmc0sZB0kg6LwkvObpbBb0aTlH4RJvYY93cbMsvu + 7UcRuU1s5fN3a9/9v86cbj628zW/sZ+vB8sYdSyqjSMg51D/3VGSI7XYe20zdl2pwvZL67H5fIssqSJt + xRhqSJ5hi4ptBcHEn2WILskJnRccyePfDKjbaQzpDaK5s/qZnPnHNqec2LWCbf9efcZwn5rEyS+QxKo+ + ZSCGpppjz4XNuHSvAZ/frsPBGzsISM1rWVJzoYgAbcBUKmivdabtANjBaqo+pJN1n3MkjH5zjZFgcJoI + BsNUoO3Tt9lwtlyFaJmMQldPubPrFUI4DXGU3GbTuQqvzCOVMad4Ai7eraeufpL01V5Kq62UVpt4hVt7 + uYIXiu4UhbbewCSF7XQBaB54ziLwv9HJXphU6wb/UikopaDnNgAGYxSemM3vH2e4ROaTzm6sq+u0Z8m4 + SONUr1jFasA+XoCSw6tw7f55nP72CK90WX3sofqou74N83eH8RKeqeJAAmA3SwCb6fp8BH4PSHDFyi8S + 4VlozIsov3wp36oFQwbAOET5G3Fqz2Fd3Vxn19PM0c04pl+EReyA3y2iB2D8Ui+cvF6Hy3xaHSAg23Gg + aTulVy08C0x5mcEmwYFzDJisfsYAPPKLH4TdX1YgsGQgfIto4mLjHlX60CwzGA1Xg8bQ3jCY2uuIYBGn + 39mNdXVd/ymcolG0XKnJXPlmqxhNLKqeh0s/NuLMnRM49vUenPh6Py/JSSnww499pJCB+JMjj+aRb7wj + am+ux5Kj8zCU5lkmaZmQYtJ1FKEdlCCCYOgA6I3o95c4um+uaKlM765usLPrFadz1sJI2Qbh7F7wSbdD + 7ekaXjDW3zmGxjun+AlueLkEjjGGsJ8rJADhgic+8Q6obSpHJVHX+PWDeUnLU1arm8BCxoYam2kCqg9l + GI5X+E2SohhmmCMj09mNdXWdZjgXYhaj+Ks0WgXRBaGov3EcNx5cRnH9UngUGcIp1pCBeMr6wFPvBHvs + urmROuEarKHp6V3efTN828EnVwIzauPMvDKdNuCKaVZvu65urrPr5UM4OcOoPqvEUf3+cl1girJ9+fj2 + 168wpsoeDgkGGBQvesxRHj31ne+A/V9V8x2whkbCkE1eHxRSbDpj9cHU4JCFJjAO0KC5td8rw3C5bZRW + qp3dWFfXUScXCyJ6HDOYLYvINZOwam8GnBIFcJ5v9JijYvjTZ4EDDt7a1tL9aJ4trl9GtfCGd98VU6/r + gyYlZ6oPNrsajlN8RnN0GqVVp/TRh0Copny8SarP5Eabxyr94JFmAcf5QgxOEv/BOcwVPvNOcsDRb3bR + 6RdR+84H64BsTmUGVfsW/iG3LbCCbEOqF1vqH0bk65hPV/3RKLlHlz1Po7Se5rpTZO9JUpRmkDbrsLaU + Z3A9xTH9FtokaD5zTTV+xDlGGz7zSbHHie/2kqtQyrsKm86v438Oq/blXTXGSh1NR6w+GFuNJT+U2YVm + Y7WhO6If9GO61/SK7Xgj7SNAUetjNkPljkmgFszIrJLMUTtvmt3ro5KGurmee4ZJJUeV/Nw3ZSBO3znI + t+42EKwWas4V8xt/zUrv6HKWSqwmGC8PThbDfBzNvZOVX9KQs4KGnE5TrVFk351mozXJrpTyvcd8vA7I + PG42jeufJ8z5+PtwTnGiFz6pdmj44ViLrG0DcYEAnC/hJyXWG/zIfGofBbZ55hR4kPViOVmXqFUR4rn9 + 9pH06LTMZlEwiOseKaCpzDXFhO+wvENXZg1HoklmzZuHq/0qSu4R1FHhc9SkXnin2+LCvVPYd33za22+ + 5UIpRaAIWy+UkaM2l0CI3qoH5hQ4xopIcuiADqF5WLF4eFfZRRD/WYzRZKVm+wgD2ji5D63ONu8LUW0x + A9mGRJvhOAU6HLljNJe8pwQ453lGL7wybKjbNaCONEebLmcDBgPBtPmOSxWYXu3/1qDN2+Xkk1Ih8REg + //Qn04W9XDoDgoq0h2msfI2I3AmHaEO+SfLWSbsU5SmbrMZRxHRDFprCMpSsxGkqL4jp0lWSuO5tn8O5 + JIpfeqVLcf1+Iw7f3PlBECwK7GHuGXML2izyNjr1zZOSsNKHaZhKs1miQqnFUrnBwuz3x03auIIkVTFe + Eqn2rdUUPbilm/Cm1bu+z2sgTAlQVJh89qYGah2mD5t5mkdolH3NUhxx6UvPDCluPriE42QTMhCHSPkd + urkLB5t2YM+1TdhCjMSisf1SOabV+PGylmmSNrnBIsE2wYxg5i6TzIV0rtpz46i+x3UjuhWRbN5lFNG3 + gdjlhfVUXRB1w5sYi7EXnzb8yTOme5/teGOLImQ32wBWMRp3JSv69msfZW5wsvFfQzPNcfvhdZy6dRCn + yYU7e+ckdlxdT0auO3Y3VWJ/05ZWECX8gMFcZSY3mLTlKbb1pEbxNzTW/Gm5k6PNFCOJRQyMEPJF6Zpi + DD92o0OF2nYxwl7PIhpU7kjzL+spb0C0sJwN3QQZMmp9brJQ9j1q5SiHm4dmmuKHP77FlXuNOEtTUcS2 + MZDEKsMoRBFuWWLsaap+DYIN3lvJRcg9kcJ7p6xPtI9Gmx3IwDBGCSR/kz3sZ/4KqtXfbNs4O/XgDYMx + boMLRpc5EAib14fC3oPlv/kMuiBM7jnxQ/XFUR42ey22xONXj5B7KhW2yRqwCFcjZhG+pEZx1XKWFtyz + RW8iQdS6ifUI6tbV5wvo5oVFQ8TXRpvP32HTo1RhG2deD+stY8udEVrlhclVwzChwg1j1zu3gChtiRBL + M3uKnkWyQn6HNOqeafrKI8cUQescYT6HFOYcZZjNl99DuabrtFbjU4d0nT3WkdpwyzbG1itlVOTbsO1i + OUmOAt412EQgmJ+TRD7/aLo+YgMHq5G2ezHmrrGHDeXsb175Jpi40Q0R28diztYgTK/xJxDePEEwEOMq + XDB24yB4LTdv0fyp2vUGi96wzrtAOBYiJxJkjvGGsEvWumacLuvafhExSnfHTJ1a22g9OGTqIPNwJA5/ + VUvOQUXL5ukig3k46xtyeXeNuWrMZZ63KxThW0bRvdlIRNOV0vzdoXQzMwPJ+8LpfyF0zTSBBxBO/59W + PRxhVT4IqfbCxApXeFA3dqQbUIcM3UMWy+Q6vCtj++RY6x6YpP3QJnvATEL6wbtawULuE2mq4gYLGi5s + E7UxtswZtVcriKGq+T5RcWYNypjvf3opCulqKf9UFtaezMSaE2m8YbuEXS/RHRkTiAv2TEMCAYit/Q/m + bh/Pg5i9JRCzto5CMEXAOVkEqzj1ZmmGciZ97t8OTBxx+FJa2LczDUicJhtIUXo4MEEPLjkCJO4P4Qua + SRAmAlkkmPRYRwDWnMzgZ9i2+zH+IoMu+VIoAgwEcxmYdZiwZwqid47HqAIHOC8QwS5J6w9xRs9OG2ed + umZtD44GeyWKxg7rBGat68MhSxuhNb4o/O9i7L5WRXRbQqNpHkrrl1M0cniTdjVFYvmxZCw9wu4A4pF9 + OBapB2Zi6mZ/+K2ywqAUIZyTKGUyda9Syup15jBfd+KuLG6/VieJC5AmK9bbLtCAU4oBnBbpwX2lEEHr + nTB7WyBd4E1H9pFopNBFXuLeEETvonTZHohpm/0QUEDqNUuIQamGYK+XJCvUm2T2Cu6sWfxWI/unANpe + R+lnZpmuVOSUqfvUjW41B2eI4ZJhiMGLaGLKoSe79VloyP/dJd0IrpnGYOutMpUK6fWm/2YPXU6hjj6M + uL8P3ex4Uk3Fk66v8F4lOU/PY3ruUmduJDNgN/290HeNNI2usCax9f9m422v/X+9H9H2GgswbAAAAABJ + RU5ErkJggg== + + + + 17, 17 + + + 114, 17 + + \ No newline at end of file diff --git a/src/Forms/QuickOutlineForm.Designer.cs b/src/Forms/QuickOutlineForm.Designer.cs new file mode 100644 index 0000000..7ef8bfe --- /dev/null +++ b/src/Forms/QuickOutlineForm.Designer.cs @@ -0,0 +1,97 @@ +namespace FindFilesPlugin +{ + partial class QuickOutlineForm + { + /// + /// Required designer variable. + /// + private System.ComponentModel.IContainer components = null; + + /// + /// Clean up any resources being used. + /// + /// true if managed resources should be disposed; otherwise, false. + protected override void Dispose(bool disposing) + { + if (disposing && (components != null)) + { + components.Dispose(); + } + base.Dispose(disposing); + } + + #region Windows Form Designer generated code + + /// + /// Required method for Designer support - do not modify + /// the contents of this method with the code editor. + /// + private void InitializeComponent() + { + this.textBox = new System.Windows.Forms.TextBox(); + this.tree = new System.Windows.Forms.TreeView(); + this.SuspendLayout(); + // + // textBox + // + this.textBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.textBox.BackColor = System.Drawing.Color.OldLace; + this.textBox.Font = new System.Drawing.Font("Courier New", 9.75F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(204))); + this.textBox.Location = new System.Drawing.Point(13, 8); + this.textBox.Name = "textBox"; + this.textBox.Size = new System.Drawing.Size(305, 22); + this.textBox.TabIndex = 0; + this.textBox.TextChanged += new System.EventHandler(this.textBox1_TextChanged); + this.textBox.KeyDown += new System.Windows.Forms.KeyEventHandler(this.textBox1_KeyDown); + // + // tree + // + this.tree.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) + | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.tree.BackColor = System.Drawing.Color.OldLace; + this.tree.BorderStyle = System.Windows.Forms.BorderStyle.None; + this.tree.DrawMode = System.Windows.Forms.TreeViewDrawMode.OwnerDrawText; + this.tree.Font = new System.Drawing.Font("Microsoft Sans Serif", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(204))); + this.tree.HideSelection = false; + this.tree.Location = new System.Drawing.Point(13, 40); + this.tree.Name = "tree"; + this.tree.ShowLines = false; + this.tree.ShowPlusMinus = false; + this.tree.ShowRootLines = false; + this.tree.Size = new System.Drawing.Size(305, 173); + this.tree.TabIndex = 1; + this.tree.NodeMouseDoubleClick += new System.Windows.Forms.TreeNodeMouseClickEventHandler(this.tree_NodeMouseDoubleClick); + this.tree.DrawNode += new System.Windows.Forms.DrawTreeNodeEventHandler(this.tree_DrawNode); + // + // QuickOutlineForm + // + this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); + this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; + this.BackColor = System.Drawing.Color.OldLace; + this.ClientSize = new System.Drawing.Size(330, 225); + this.Controls.Add(this.tree); + this.Controls.Add(this.textBox); + this.KeyPreview = true; + this.MaximizeBox = false; + this.MinimizeBox = false; + this.MinimumSize = new System.Drawing.Size(320, 200); + this.Name = "QuickOutlineForm"; + this.ShowIcon = false; + this.ShowInTaskbar = false; + this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; + this.Text = "Quick Outline"; + this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.QuickOutlineForm_FormClosing); + this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.QuickOutlineForm_KeyDown); + this.ResumeLayout(false); + this.PerformLayout(); + + } + + #endregion + + private System.Windows.Forms.TextBox textBox; + private System.Windows.Forms.TreeView tree; + } +} \ No newline at end of file diff --git a/src/Forms/QuickOutlineForm.cs b/src/Forms/QuickOutlineForm.cs new file mode 100644 index 0000000..c6ad04b --- /dev/null +++ b/src/Forms/QuickOutlineForm.cs @@ -0,0 +1,260 @@ +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.Data; +using System.Drawing; +using System.IO; +using System.Text; +using System.Windows.Forms; +using ASCompletion.Context; +using ASCompletion.Model; +using PluginCore; + +namespace FindFilesPlugin +{ + public partial class QuickOutlineForm : Form + { + public const int ICON_FILE = 0; + public const int ICON_FOLDER_CLOSED = 1; + public const int ICON_FOLDER_OPEN = 2; + public const int ICON_CHECK_SYNTAX = 3; + public const int ICON_QUICK_BUILD = 4; + public const int ICON_PACKAGE = 5; + public const int ICON_INTERFACE = 6; + public const int ICON_INTRINSIC_TYPE = 7; + public const int ICON_TYPE = 8; + public const int ICON_VAR = 9; + public const int ICON_PROTECTED_VAR = 10; + public const int ICON_PRIVATE_VAR = 11; + public const int ICON_CONST = 12; + public const int ICON_PROTECTED_CONST = 13; + public const int ICON_PRIVATE_CONST = 14; + public const int ICON_FUNCTION = 15; + public const int ICON_PROTECTED_FUNCTION = 16; + public const int ICON_PRIVATE_FUNCTION = 17; + public const int ICON_PROPERTY = 18; + public const int ICON_PROTECTED_PROPERTY = 19; + public const int ICON_PRIVATE_PROPERTY = 20; + public const int ICON_TEMPLATE = 21; + public const int ICON_DECLARATION = 22; + + private PluginMain plugin; + + public QuickOutlineForm(PluginMain plugin) + { + this.plugin = plugin; + InitializeComponent(); + + if ((plugin.Settings as Settings).OutlineFormSize.Width > MinimumSize.Width) + Size = (plugin.Settings as Settings).OutlineFormSize; + + InitTree(); + RefreshTree(); + } + + private void InitTree() + { + //IContainer components; + ComponentResourceManager resources = new ComponentResourceManager(typeof(ASCompletion.PluginUI)); + ImageList treeIcons = new ImageList(); + treeIcons.ImageStream = ((ImageListStreamer)(resources.GetObject("treeIcons.ImageStream"))); + treeIcons.TransparentColor = Color.Transparent; + treeIcons.Images.SetKeyName(0, "FilePlain.png"); + treeIcons.Images.SetKeyName(1, "FolderClosed.png"); + treeIcons.Images.SetKeyName(2, "FolderOpen.png"); + treeIcons.Images.SetKeyName(3, "CheckAS.png"); + treeIcons.Images.SetKeyName(4, "QuickBuild.png"); + treeIcons.Images.SetKeyName(5, "Package.png"); + treeIcons.Images.SetKeyName(6, "Interface.png"); + treeIcons.Images.SetKeyName(7, "Intrinsic.png"); + treeIcons.Images.SetKeyName(8, "Class.png"); + treeIcons.Images.SetKeyName(9, "Variable.png"); + treeIcons.Images.SetKeyName(10, "VariableProtected.png"); + treeIcons.Images.SetKeyName(11, "VariablePrivate.png"); + treeIcons.Images.SetKeyName(12, "Const.png"); + treeIcons.Images.SetKeyName(13, "ConstProtected.png"); + treeIcons.Images.SetKeyName(14, "ConstPrivate.png"); + treeIcons.Images.SetKeyName(15, "Method.png"); + treeIcons.Images.SetKeyName(16, "MethodProtected.png"); + treeIcons.Images.SetKeyName(17, "MethodPrivate.png"); + treeIcons.Images.SetKeyName(18, "Property.png"); + treeIcons.Images.SetKeyName(19, "PropertyProtected.png"); + treeIcons.Images.SetKeyName(20, "PropertyPrivate.png"); + treeIcons.Images.SetKeyName(21, "Template.png"); + treeIcons.Images.SetKeyName(22, "Declaration.png"); + + tree.ImageList = treeIcons; + } + + private void tree_DrawNode(object sender, DrawTreeNodeEventArgs e) + { + Rectangle fillBounds = new Rectangle(e.Node.Bounds.Location, e.Node.Bounds.Size); + fillBounds.X -= 1; + fillBounds.Width += 10; + + Rectangle textBounds = new Rectangle(e.Node.Bounds.Location, e.Node.Bounds.Size); + textBounds.X += 2; + textBounds.Width += 10; + + if ((e.State & TreeNodeStates.Selected) != 0) + { + e.Graphics.FillRectangle(Brushes.SaddleBrown, fillBounds); + e.Graphics.DrawString(e.Node.Text, tree.Font, Brushes.White, textBounds); + using (Pen focusPen = new Pen(Color.Gray)) + { + focusPen.DashStyle = System.Drawing.Drawing2D.DashStyle.Dot; + Rectangle focusBounds = fillBounds; + focusBounds.Size = new Size(focusBounds.Width - 1, + focusBounds.Height - 1); + e.Graphics.DrawRectangle(focusPen, focusBounds); + } + } + else + { + e.Graphics.FillRectangle(new SolidBrush(tree.BackColor), fillBounds); + e.Graphics.DrawString(e.Node.Text, tree.Font, Brushes.Black, textBounds); + } + + } + + private void Navigate() + { + if (tree.SelectedNode != null) + { + ASContext.Context.OnSelectOutlineNode(tree.SelectedNode); + Close(); + } + } + + private void RefreshTree() + { + tree.BeginUpdate(); + tree.Nodes.Clear(); + FillTree(); + tree.EndUpdate(); + } + + private void FillTree() + { + FileModel model = ASContext.Context.CurrentModel; + if (model == FileModel.Ignore) + return; + + // members + if (model.Members.Count > 0) + { + AddMembers(tree.Nodes, model.Members); + } + // classes + foreach (ClassModel classModel in model.Classes) + { + int imageNum = ((classModel.Flags & FlagType.Intrinsic) > 0) ? ICON_INTRINSIC_TYPE : + ((classModel.Flags & FlagType.Interface) > 0) ? ICON_INTERFACE : ICON_TYPE; + TreeNode node = new TreeNode(classModel.Name, imageNum, imageNum); + node.Tag = "class"; + tree.Nodes.Add(node); + AddMembers(node.Nodes, classModel.Members); + node.Expand(); + } + } + + private void AddMembers(TreeNodeCollection nodes, MemberList members) + { + String searchedText = textBox.Text.ToLower().Trim(); + foreach (MemberModel member in members) + { + String memberText = member.ToString().ToLower(); + if (searchedText.Length > 0 && !memberText.StartsWith(searchedText)) + continue; + + MemberTreeNode node = null; + int imageIndex; + if ((member.Flags & FlagType.Constant) > 0) + { + imageIndex = ((member.Access & Visibility.Private) > 0) ? ICON_PRIVATE_CONST : + ((member.Access & Visibility.Protected) > 0) ? ICON_PROTECTED_CONST : ICON_CONST; + node = new MemberTreeNode(member, imageIndex); + nodes.Add(node); + } + else if ((member.Flags & FlagType.Variable) > 0) + { + imageIndex = ((member.Access & Visibility.Private) > 0) ? ICON_PRIVATE_VAR : + ((member.Access & Visibility.Protected) > 0) ? ICON_PROTECTED_VAR : ICON_VAR; + node = new MemberTreeNode(member, imageIndex); + nodes.Add(node); + } + else if ((member.Flags & (FlagType.Getter | FlagType.Setter)) > 0) + { + if (node != null && node.Text == member.ToString()) // "collapse" properties + continue; + imageIndex = ((member.Access & Visibility.Private) > 0) ? ICON_PRIVATE_PROPERTY : + ((member.Access & Visibility.Protected) > 0) ? ICON_PROTECTED_PROPERTY : ICON_PROPERTY; + node = new MemberTreeNode(member, imageIndex); + nodes.Add(node); + } + else if ((member.Flags & FlagType.Function) > 0) + { + imageIndex = ((member.Access & Visibility.Private) > 0) ? ICON_PRIVATE_FUNCTION : + ((member.Access & Visibility.Protected) > 0) ? ICON_PROTECTED_FUNCTION : ICON_FUNCTION; + node = new MemberTreeNode(member, imageIndex); + nodes.Add(node); + } + if (tree.SelectedNode == null) + tree.SelectedNode = node; + } + } + + private void QuickOutlineForm_KeyDown(object sender, KeyEventArgs e) + { + if (e.KeyCode == Keys.Escape) + { + Close(); + } + else if (e.KeyCode == Keys.Enter) + { + e.Handled = true; + Navigate(); + } + } + + private void QuickOutlineForm_FormClosing(object sender, FormClosingEventArgs e) + { + (plugin.Settings as Settings).OutlineFormSize = Size; + } + + private void textBox1_TextChanged(object sender, EventArgs e) + { + RefreshTree(); + } + + private void textBox1_KeyDown(object sender, KeyEventArgs e) + { + if (e.KeyCode == Keys.Down && tree.SelectedNode != null && tree.SelectedNode.NextVisibleNode != null) + { + tree.SelectedNode = tree.SelectedNode.NextVisibleNode; + e.Handled = true; + } + else if (e.KeyCode == Keys.Up && tree.SelectedNode != null && tree.SelectedNode.PrevVisibleNode != null) + { + tree.SelectedNode = tree.SelectedNode.PrevVisibleNode; + e.Handled = true; + } + } + + private void tree_NodeMouseDoubleClick(object sender, TreeNodeMouseClickEventArgs e) + { + Navigate(); + } + + } +} + +class MemberTreeNode : TreeNode +{ + public MemberTreeNode(MemberModel member, int imageIndex) + : base(member.ToString(), imageIndex, imageIndex) + { + Tag = member.Name + "@" + member.LineFrom; + } +} + diff --git a/src/Forms/QuickOutlineForm.resx b/src/Forms/QuickOutlineForm.resx new file mode 100644 index 0000000..19dc0dd --- /dev/null +++ b/src/Forms/QuickOutlineForm.resx @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/src/PluginMain.cs b/src/PluginMain.cs new file mode 100644 index 0000000..07f9685 --- /dev/null +++ b/src/PluginMain.cs @@ -0,0 +1,282 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Windows.Forms; +using System.ComponentModel; +using PluginCore.Utilities; +using PluginCore.Managers; +using PluginCore.Helpers; +using PluginCore; + +namespace QuickNavigatePlugin +{ + public class PluginMain : IPlugin + { + private const String PLUGIN_NAME = "QuickNavigate"; + private const String PLUGIN_GUID = "ac04a177-f578-47d7-87f1-0cbc0f834446"; + private const String PLUGIN_HELP = "www.flashdevelop.org/community/"; + private const String PLUGIN_AUTH = "Canab & CrazySam"; + private const String SETTINGS_FILE = "Settings.fdb"; + private const String PLUGIN_DESC = "QuickNavigate plugin"; + + private String settingFilename; + private Settings settingObject; + private ControlClickManager controlClickManager; + + private ToolStripMenuItem findFilesMenuItem = null; + private ToolStripMenuItem quickOutlineMenuItem = null; + + #region Required Properties + + /// + /// Name of the plugin + /// + public String Name + { + get { return PLUGIN_NAME; } + } + + /// + /// GUID of the plugin + /// + public String Guid + { + get { return PLUGIN_GUID; } + } + + /// + /// Author of the plugin + /// + public String Author + { + get { return PLUGIN_AUTH; } + } + + /// + /// Description of the plugin + /// + public String Description + { + get { return PLUGIN_DESC; } + } + + /// + /// Web address for help + /// + public String Help + { + get { return PLUGIN_HELP; } + } + + /// + /// Object that contains the settings + /// + [Browsable(false)] + public Object Settings + { + get { return settingObject; } + } + + #endregion + + #region Required Methods + + /// + /// Initializes the plugin + /// + public void Initialize() + { + InitBasics(); + LoadSettings(); + AddEventHandlers(); + CreateMenuItems(); + + if (settingObject.CtrlClickEnabled) + controlClickManager = new ControlClickManager(); + } + + /// + /// Disposes the plugin + /// + public void Dispose() + { + SaveSettings(); + } + + /// + /// Handles the incoming events + /// + public void HandleEvent(Object sender, NotifyEvent e, HandlingPriority prority) + { + if (e.Type == EventType.FileSwitch) + { + if (controlClickManager != null) + controlClickManager.SciControl = PluginBase.MainForm.CurrentDocument.SciControl; + } + else if (e.Type == EventType.ApplySettings) + { + if (!PluginBase.MainForm.IgnoredKeys.Contains(settingObject.OpenResourceShortcut)) + PluginBase.MainForm.IgnoredKeys.Add(settingObject.OpenResourceShortcut); + + if (!PluginBase.MainForm.IgnoredKeys.Contains(settingObject.QuickOutlineShortcut)) + PluginBase.MainForm.IgnoredKeys.Add(settingObject.QuickOutlineShortcut); + + findFilesMenuItem.ShortcutKeys = settingObject.OpenResourceShortcut; + quickOutlineMenuItem.ShortcutKeys = settingObject.QuickOutlineShortcut; + } + } + + #endregion + + #region Custom Methods + + /// + /// Initializes important variables + /// + public void InitBasics() + { + String dataPath = Path.Combine(PathHelper.DataDir, PLUGIN_NAME); + if (!Directory.Exists(dataPath)) + Directory.CreateDirectory(dataPath); + settingFilename = Path.Combine(dataPath, SETTINGS_FILE); + } + + public void AddEventHandlers() + { + EventManager.AddEventHandler(this, EventType.FileSwitch); + EventManager.AddEventHandler(this, EventType.ApplySettings); + } + + public void CreateMenuItems() + { + ToolStripMenuItem menu = (ToolStripMenuItem)PluginBase.MainForm.FindMenuItem("ViewMenu"); + findFilesMenuItem = new ToolStripMenuItem("Find Files", + PluginBase.MainForm.FindImage("209"), + new EventHandler(ShowResourceForm), + settingObject.OpenResourceShortcut); + + quickOutlineMenuItem = new ToolStripMenuItem("Quick Outline", + PluginBase.MainForm.FindImage("315|16|0|0"), + new EventHandler(ShowOutlineForm), + settingObject.QuickOutlineShortcut); + + + menu.DropDownItems.Add(findFilesMenuItem); + menu.DropDownItems.Add(quickOutlineMenuItem); + + PluginBase.MainForm.IgnoredKeys.Add(settingObject.OpenResourceShortcut); + PluginBase.MainForm.IgnoredKeys.Add(settingObject.QuickOutlineShortcut); + } + + private void ShowResourceForm(object sender, EventArgs e) + { + new FindFilesForm(this).ShowDialog(); + } + + private void ShowOutlineForm(object sender, EventArgs e) + { + new QuickOutlineForm(this).ShowDialog(); + } + + public void LoadSettings() + { + if (File.Exists(settingFilename)) + { + try + { + settingObject = new Settings(); + settingObject = (Settings) ObjectSerializer.Deserialize(settingFilename, settingObject); + } + catch + { + settingObject = new Settings(); + SaveSettings(); + } + } + else + { + settingObject = new Settings(); + SaveSettings(); + } + } + + public void SaveSettings() + { + ObjectSerializer.Serialize(settingFilename, settingObject); + } + + public List GetProjectFiles() + { + List folders = GetProjectFolders(); + List files = new List(); + foreach (String folder in folders) + { + try + { + string[] filters = settingObject.SearchFilter.Split(','); + foreach (string filter in filters) + { + files.AddRange(Directory.GetFiles(folder, filter, SearchOption.AllDirectories)); + } + } + catch(ArgumentException) + { + MessageBox.Show("Search Filter : \"" + settingObject.SearchFilter + "\" is invalid. Visit http://msdn.microsoft.com/en-us/library/ms143316.aspx for more information.", "Invalid Search Filter", MessageBoxButtons.OK); + settingObject.SearchFilter = settingObject.DefaultSearchFilter; + return new List(); + } + } + return files; + } + + public List GetProjectFolders() + { + List folders = new List(); + // Lots of things can go wrong here. Missing Permissions for example. + try + { + // Check if we have a project open. + if (PluginBase.CurrentProject != null) + { + String projectFolder = Path.GetDirectoryName(PluginBase.CurrentProject.ProjectPath); + folders.Add(projectFolder); + foreach (String path in PluginBase.CurrentProject.SourcePaths) + { + if (Path.IsPathRooted(path)) + { + folders.Add(path); + } + else + { + String folder = Path.GetFullPath(Path.Combine(projectFolder, path)); + if (!folder.StartsWith(projectFolder)) + folders.Add(folder); + } + } + } + + // A directory was specified, lets get the underlying folders there! + if (settingObject.SearchDirectory != settingObject.DefaultSearchDirectory) + { + if (!Directory.Exists(settingObject.SearchDirectory)) + { + MessageBox.Show("Path: \"" + settingObject.SearchDirectory + "\" not found.", "Directory Not Found", MessageBoxButtons.OK); + settingObject.SearchDirectory = settingObject.DefaultSearchDirectory; + return folders; + } + folders.Add(settingObject.SearchDirectory); + } + } + catch (System.Exception ex) + { + MessageBox.Show(ex.Message, "Exception!", MessageBoxButtons.OK); + settingObject.SearchDirectory = settingObject.DefaultSearchDirectory; + Dispose(); + } + + return folders; + } + #endregion + + } +} diff --git a/src/Properties/AssemblyInfo.cs b/src/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..09aa20e --- /dev/null +++ b/src/Properties/AssemblyInfo.cs @@ -0,0 +1,33 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("FindFilesPlugin")] +[assembly: AssemblyDescription("Plugin for Quick File Search and Navigation for Flash Develop")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("FindFilesPlugin.Properties")] +[assembly: AssemblyCopyright("Copyright © 2010")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("ac04a177-f578-47d7-87f1-0cbc0f834446")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/src/Properties/Resources.Designer.cs b/src/Properties/Resources.Designer.cs new file mode 100644 index 0000000..c99fbf0 --- /dev/null +++ b/src/Properties/Resources.Designer.cs @@ -0,0 +1,63 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Runtime Version:4.0.30319.42000 +// +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +//------------------------------------------------------------------------------ + +namespace FindFilesPlugin.Properties { + using System; + + + /// + /// A strongly-typed resource class, for looking up localized strings, etc. + /// + // This class was auto-generated by the StronglyTypedResourceBuilder + // class via a tool like ResGen or Visual Studio. + // To add or remove a member, edit your .ResX file then rerun ResGen + // with the /str option, or rebuild your VS project. + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + internal class Resources { + + private static global::System.Resources.ResourceManager resourceMan; + + private static global::System.Globalization.CultureInfo resourceCulture; + + [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] + internal Resources() { + } + + /// + /// Returns the cached ResourceManager instance used by this class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Resources.ResourceManager ResourceManager { + get { + if (object.ReferenceEquals(resourceMan, null)) { + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("FindFilesPlugin.Properties.Resources", typeof(Resources).Assembly); + resourceMan = temp; + } + return resourceMan; + } + } + + /// + /// Overrides the current thread's CurrentUICulture property for all + /// resource lookups using this strongly typed resource class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Globalization.CultureInfo Culture { + get { + return resourceCulture; + } + set { + resourceCulture = value; + } + } + } +} diff --git a/src/Properties/Resources.resx b/src/Properties/Resources.resx new file mode 100644 index 0000000..7080a7d --- /dev/null +++ b/src/Properties/Resources.resx @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/src/SimMetrics/BaseClasses/Abstract/AbstractAffineGapCost.cs b/src/SimMetrics/BaseClasses/Abstract/AbstractAffineGapCost.cs new file mode 100644 index 0000000..49c0aa9 --- /dev/null +++ b/src/SimMetrics/BaseClasses/Abstract/AbstractAffineGapCost.cs @@ -0,0 +1,87 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsApi { + using System; + + /// + /// abstract class used as a base for all affine gap classes + /// + [Serializable] + abstract public class AbstractAffineGapCost : IAffineGapCost { + /// + /// get cost between characters. + /// + /// the string to get the cost of a gap + /// the index within the string to test a start gap from + /// the index within the string to test a end gap to + /// the cost of a Gap G + abstract public double GetCost(string textToGap, int stringIndexStartGap, int stringIndexEndGap); + + /// + /// returns the maximum possible cost. + /// + abstract public double MaxCost { get; } + + /// + /// returns the minimum possible cost. + /// + abstract public double MinCost { get; } + + /// + /// returns the name of the cost function. + /// + abstract public string ShortDescriptionString { get; } + } +} \ No newline at end of file diff --git a/src/SimMetrics/BaseClasses/Abstract/AbstractStringMetric.cs b/src/SimMetrics/BaseClasses/Abstract/AbstractStringMetric.cs new file mode 100644 index 0000000..3ba99ab --- /dev/null +++ b/src/SimMetrics/BaseClasses/Abstract/AbstractStringMetric.cs @@ -0,0 +1,162 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsApi { + using System; + + /// + /// base class which all metrics inherit from. + /// + /// This class implemented a few basic methods and then leaves the others to + /// be implemented by the similarity metric itself. + [Serializable] + abstract public class AbstractStringMetric : IStringMetric { + /// + /// does a batch comparison of the set of strings with the given + /// comparator string returning an array of results equal in length + /// to the size of the given set of strings to test. + /// + /// an array of strings to test against the comparator string + /// the comparator string to test the array against + /// an array of results equal in length to the size of the given set of strings to test. + public double[] BatchCompareSet(string[] setRenamed, string comparator) { + if ((setRenamed != null) && (comparator != null)) { + double[] results = new double[setRenamed.Length]; + for (int strNum = 0; strNum < setRenamed.Length; strNum++) { + results[strNum] = GetSimilarity(setRenamed[strNum], comparator); + } + return results; + } + return null; + } + + /// + /// does a batch comparison of one set of strings against another set + /// of strings returning an array of results equal in length + /// to the minimum size of the given sets of strings to test. + /// + /// an array of strings to test + /// an array of strings to test the first array against + /// an array of results equal in length to the minimum size of the given sets of strings to test. + public double[] BatchCompareSets(string[] firstSet, string[] secondSet) { + if ((firstSet != null) && (secondSet != null)) { + double[] results; + if (firstSet.Length <= secondSet.Length) { + results = new double[firstSet.Length]; + } + else { + results = new double[secondSet.Length]; + } + for (int strNum = 0; strNum < results.Length; strNum++) { + results[strNum] = GetSimilarity(firstSet[strNum], secondSet[strNum]); + } + return results; + } + return null; + } + + /// + /// gets the similarity measure of the metric for the given strings. + /// + /// + /// + /// implemented version will return score between 0 and 1 + abstract public double GetSimilarity(string firstWord, string secondWord); + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + abstract public string GetSimilarityExplained(string firstWord, string secondWord); + + /// + /// gets the actual time in milliseconds it takes to perform a similarity timing. + /// This call takes as long as the similarity metric to perform so should not be done in normal circumstances. + /// + /// + /// + /// the actual time in milliseconds taken to perform the similarity measure + public long GetSimilarityTimingActual(string firstWord, string secondWord) { + long timeBefore = (DateTime.Now.Ticks - 621355968000000000) / 10000; + GetSimilarity(firstWord, secondWord); + long timeAfter = (DateTime.Now.Ticks - 621355968000000000) / 10000; + return timeAfter - timeBefore; + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + abstract public double GetSimilarityTimingEstimated(string firstWord, string secondWord); + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + abstract public double GetUnnormalisedSimilarity(string firstWord, string secondWord); + + /// + /// reports the metric type. + /// + abstract public string LongDescriptionString { get; } + + /// + /// reports the metric type. + /// + abstract public string ShortDescriptionString { get; } + } +} \ No newline at end of file diff --git a/src/SimMetrics/BaseClasses/Abstract/AbstractSubstitutionCost.cs b/src/SimMetrics/BaseClasses/Abstract/AbstractSubstitutionCost.cs new file mode 100644 index 0000000..f1fbf5b --- /dev/null +++ b/src/SimMetrics/BaseClasses/Abstract/AbstractSubstitutionCost.cs @@ -0,0 +1,88 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsApi { + using System; + + /// + /// AbstractSubstitutionCost implements a abstract class for substiution costs + /// + [Serializable] + abstract public class AbstractSubstitutionCost : ISubstitutionCost { + /// + /// get cost between characters. + /// + /// the firstWord to evaluate the cost + /// the index within the firstWord to test + /// the secondWord to evaluate the cost + /// the index within the string2 to test + /// + abstract public double GetCost(string firstWord, int firstWordIndex, string secondWord, int secondWordIndex); + + /// + /// returns the maximum possible cost. + /// + abstract public double MaxCost { get; } + + /// + /// returns the minimum possible cost. + /// + abstract public double MinCost { get; } + + /// + /// returns the name of the cost function. + /// + abstract public string ShortDescriptionString { get; } + } +} \ No newline at end of file diff --git a/src/SimMetrics/BaseClasses/Abstract/AbstractTokeniserQGramN.cs b/src/SimMetrics/BaseClasses/Abstract/AbstractTokeniserQGramN.cs new file mode 100644 index 0000000..6602622 --- /dev/null +++ b/src/SimMetrics/BaseClasses/Abstract/AbstractTokeniserQGramN.cs @@ -0,0 +1,161 @@ +#region Copyright +/* This new class in the .NET version holds abstract tokeniser class. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsApi { + using System; + using System.Collections.ObjectModel; + using System.Text; + using SimMetricsUtilities; + + /// + /// Implements a QGram Tokeniser to cope with all gram sizes. + /// + /// The cci value determines at what level the skip characters + /// are gathered. This is a variation of the normal QGram analysis when + /// character pairs are created having skipped characters in the words. + [Serializable] + public abstract class AbstractTokeniserQGramN : ITokeniser { + const string defaultEndPadCharacter = "#"; + const string defaultStartPadCharacter = "?"; + int characterCombinationIndex; + int qGramLength; + ITermHandler stopWordHandler; + string suppliedWord; + TokeniserUtilities tokenUtilities; + + /// + /// Return tokenized version of a string. + /// + /// input + /// tokenized version of a string + public abstract Collection Tokenize(string word); + + /// + /// full version of Tokenise which allows for different token lengths + /// as well as the characterCombinationIndexValue error level as well. + /// + /// word to tokenise + /// whether to generate extended tokens + /// length of tokens + /// error level for skip tokens + /// collection of tokens + public Collection Tokenize(string word, bool extended, int tokenLength, int characterCombinationIndexValue) { + if (!String.IsNullOrEmpty(word)) { + SuppliedWord = word; + Collection anArray = new Collection(); + int wordLength = word.Length; + int maxValue = 0; + if (tokenLength > 0) { + maxValue = (tokenLength - 1); + } + StringBuilder testword = new StringBuilder(wordLength + (2 * maxValue)); + if (extended) { + testword.Insert(0, defaultStartPadCharacter, maxValue); + } + testword.Append(word); + if (extended) { + testword.Insert(testword.Length, defaultEndPadCharacter, maxValue); + } + + // normal n-gram keys characterCombinationIndex = 0 + string testWordOne = testword.ToString(); + int maxLoop; + if (extended) { + maxLoop = wordLength + maxValue; + } + else { + maxLoop = wordLength - tokenLength + 1; + } + for (int i = 0; i < maxLoop; i++) { + string testWord = testWordOne.Substring(i, tokenLength); + if (!stopWordHandler.IsWord(testWord)) { + anArray.Add(testWord); + } + } + + if (characterCombinationIndexValue != 0) { + // special characterCombinationIndex n-gram keys + testWordOne = testword.ToString(); + maxLoop -= 1; // have to reduce by 1 as we are skipping a letter + for (int i = 0; i < maxLoop; i++) { + string testWord = testWordOne.Substring(i, maxValue) + testWordOne.Substring(i + tokenLength, 1); + if (!stopWordHandler.IsWord(testWord)) { + if (!anArray.Contains(testWord)) { + anArray.Add(testWord); + } + } + } + } + return anArray; + } + return null; + } + + /// + /// Return tokenized set of a string. + /// + /// input + /// tokenized version of a string as a set + public Collection TokenizeToSet(string word) { + if (!String.IsNullOrEmpty(word)) { + SuppliedWord = word; + return TokenUtilities.CreateSet(Tokenize(word)); + } + return null; + } + + /// + /// CCI - error level used for the sgram analysis. + /// + /// A value of 1 means the sgram will skip a letter when generating the tokens + public int CharacterCombinationIndex { get { return characterCombinationIndex; } set { characterCombinationIndex = value; } } + + /// + /// displays the delimiters used - ie none. + /// + public string Delimiters { get { return String.Empty; } } + + /// + /// length of the qgram tokens to create + /// + public int QGramLength { get { return qGramLength; } set { qGramLength = value; } } + + /// + /// displays the tokenisation method. + /// + public abstract string ShortDescriptionString { get; } + + /// + /// the stop word handler used. + /// + public ITermHandler StopWordHandler { get { return stopWordHandler; } set { stopWordHandler = value; } } + + /// + /// supplied word + /// + public string SuppliedWord { get { return suppliedWord; } set { suppliedWord = value; } } + + /// + /// class containing token utilities + /// + public TokeniserUtilities TokenUtilities { get { return tokenUtilities; } set { tokenUtilities = value; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/BaseClasses/Interfaces/IAffineGapCost.cs b/src/SimMetrics/BaseClasses/Interfaces/IAffineGapCost.cs new file mode 100644 index 0000000..b02cc32 --- /dev/null +++ b/src/SimMetrics/BaseClasses/Interfaces/IAffineGapCost.cs @@ -0,0 +1,84 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsApi { + /// + /// defines an Interface for AffineGapCost functions to be interchanged + /// + public interface IAffineGapCost { + /// + /// get cost between characters. + /// + /// the string to get the cost of a gap + /// the index within the string to test a start gap from + /// the index within the string to test a end gap to + /// + double GetCost(string textToGap, int stringIndexStartGap, int stringIndexEndGap); + + /// + /// returns the maximum possible cost. + /// + double MaxCost { get; } + + /// + /// returns the minimum possible cost. + /// + double MinCost { get; } + + /// + /// returns the name of the affine gap cost function. + /// + string ShortDescriptionString { get; } + } +} \ No newline at end of file diff --git a/src/SimMetrics/BaseClasses/Interfaces/IStringMetric.cs b/src/SimMetrics/BaseClasses/Interfaces/IStringMetric.cs new file mode 100644 index 0000000..a84a8a8 --- /dev/null +++ b/src/SimMetrics/BaseClasses/Interfaces/IStringMetric.cs @@ -0,0 +1,115 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsApi { + /// + /// implements an interface for the string metrics + /// + public interface IStringMetric { + /// + /// returns a similarity measure of the string comparison. + /// + /// + /// + /// a double between zero to one (zero = no similarity, one = matching strings) + double GetSimilarity(string firstWord, string secondWord); + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// + /// + /// string 1 + /// + /// string 2 + /// + /// + /// a div class html section detailing the metric operation. + /// + string GetSimilarityExplained(string firstWord, string secondWord); + + /// + /// gets the actual time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the actual time in milliseconds taken to perform the similarity measure + /// This call takes as long as the similarity metric to perform so should not be done in normal cercumstances. + long GetSimilarityTimingActual(string firstWord, string secondWord); + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + double GetSimilarityTimingEstimated(string firstWord, string secondWord); + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// + /// returns the score of the similarity measure (un-normalised) + double GetUnnormalisedSimilarity(string firstWord, string secondWord); + + /// + /// returns a long string of the string metric description. + /// + string LongDescriptionString { get; } + + /// + /// returns a string of the string metric name. + /// + string ShortDescriptionString { get; } + } +} \ No newline at end of file diff --git a/src/SimMetrics/BaseClasses/Interfaces/ISubstitutionCost.cs b/src/SimMetrics/BaseClasses/Interfaces/ISubstitutionCost.cs new file mode 100644 index 0000000..03710a7 --- /dev/null +++ b/src/SimMetrics/BaseClasses/Interfaces/ISubstitutionCost.cs @@ -0,0 +1,85 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsApi { + /// + /// is an interface for a cost function d(i,j). + /// + public interface ISubstitutionCost { + /// + /// get cost between characters. + /// + /// the firstWord to evaluate the cost + /// the index within the firstWord to test + /// the secondWord to evaluate the cost + /// the index within the secondWord to test + /// + double GetCost(string firstWord, int firstWordIndex, string secondWord, int secondWordIndex); + + /// + /// returns the maximum possible cost. + /// + double MaxCost { get; } + + /// + /// returns the minimum possible cost. + /// + double MinCost { get; } + + /// + /// returns the name of the cost function. + /// + string ShortDescriptionString { get; } + } +} \ No newline at end of file diff --git a/src/SimMetrics/BaseClasses/Interfaces/ITermHandler.cs b/src/SimMetrics/BaseClasses/Interfaces/ITermHandler.cs new file mode 100644 index 0000000..a234636 --- /dev/null +++ b/src/SimMetrics/BaseClasses/Interfaces/ITermHandler.cs @@ -0,0 +1,96 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsApi { + using System.Text; + + /// + /// defines an interface for stop word handlers. + /// + public interface ITermHandler { + /// + /// adds a Word to the interface. + /// + /// termToAdd the Word to add + void AddWord(string termToAdd); + + /// + /// isStopWord determines if a given term is a word or not. + /// + /// termToTest the term to test + /// true if a stopword false otherwise. + bool IsWord(string termToTest); + + /// + /// removes the given word from the list. + /// + /// termToRemove the word term to remove + void RemoveWord(string termToRemove); + + /// + /// gets the number of stopwords in the list. + /// + int NumberOfWords { get; } + + /// + /// gets the short description string of the stop word handler used. + /// + string ShortDescriptionString { get; } + + /// + /// gets the words as an output string buffer. + /// + StringBuilder WordsAsBuffer { get; } + } +} \ No newline at end of file diff --git a/src/SimMetrics/BaseClasses/Interfaces/ITokeniser.cs b/src/SimMetrics/BaseClasses/Interfaces/ITokeniser.cs new file mode 100644 index 0000000..6c565d2 --- /dev/null +++ b/src/SimMetrics/BaseClasses/Interfaces/ITokeniser.cs @@ -0,0 +1,91 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsApi { + using System.Collections.ObjectModel; + + /// + /// InterfaceTokeniser interface for a Tokeniser class. + /// + public interface ITokeniser { + /// + /// Return tokenized version of a string. + /// + /// + /// tokenized version of a string + Collection Tokenize(string word); + + /// + /// Return tokenized version of a string as a set. + /// + /// + /// tokenized version of a string as a set + Collection TokenizeToSet(string word); + + /// + /// displays the delimitors used - (if applicable). + /// + string Delimiters { get; } + + /// + /// displays the tokenisation method. + /// + string ShortDescriptionString { get; } + + /// + /// gets the stop word handler used. + /// + ITermHandler StopWordHandler { get; set; } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/QGram/QGramsDistance.cs b/src/SimMetrics/SimilarityClasses/QGram/QGramsDistance.cs new file mode 100644 index 0000000..4737405 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/QGram/QGramsDistance.cs @@ -0,0 +1,189 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// implements a QGram distance metric using supplied QGRam tokeniser + /// + [Serializable] + sealed public class QGramsDistance : AbstractStringMetric { + const double defaultMismatchScore = 0.0; + + /// + /// constructor - default (empty). + /// + public QGramsDistance() : this(new TokeniserQGram3Extended()) {} + + /// + /// the tokeniser to use; should a different tokeniser be required + /// + /// + public QGramsDistance(ITokeniser tokeniserToUse) { + tokeniser = tokeniserToUse; + tokenUtilities = new TokeniserUtilities(); + } + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 0.000134F; + /// + /// + /// + TokeniserUtilities tokenUtilities; + /// + /// tokeniser for tokenisation of the query strings. + /// + ITokeniser tokeniser; + + /// + /// gets the similarity of the two strings using QGramsDistance. + /// + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double difference = GetUnnormalisedSimilarity(firstWord, secondWord); + int maxQGramsMatching = tokenUtilities.FirstTokenCount + tokenUtilities.SecondTokenCount; + + return (maxQGramsMatching == 0) ? defaultMismatchScore : ((maxQGramsMatching - difference) / maxQGramsMatching); + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return firstLength * secondLength * estimatedTimingConstant; + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// first word + /// second word + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + Collection firstTokens = tokeniser.Tokenize(firstWord); + Collection secondTokens = tokeniser.Tokenize(secondWord); + tokenUtilities.CreateMergedList(firstTokens, secondTokens); + return GetActualSimilarity(firstTokens, secondTokens); + } + + double GetActualSimilarity(Collection firstTokens, Collection secondTokens) { + Collection allTokens = tokenUtilities.CreateMergedSet(firstTokens, secondTokens); + + int difference = 0; + foreach (string token in allTokens) { + int matchingQGrams1 = 0; + for (int i = 0; i < firstTokens.Count; i++) { + if (firstTokens[i].Equals(token)) { + matchingQGrams1++; + } + } + + int matchingQGrams2 = 0; + for (int i = 0; i < secondTokens.Count; i++) { + if (secondTokens[i].Equals(token)) { + matchingQGrams2++; + } + } + if (matchingQGrams1 > matchingQGrams2) { + difference += matchingQGrams1 - matchingQGrams2; + } + else { + difference += matchingQGrams2 - matchingQGrams1; + } + } + return difference; + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Q Grams Distance algorithm providing a similarity measure between two strings using the qGram approach check matching qGrams/possible matching qGrams"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "QGramsDistance"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/edit distance/Levenstein.cs b/src/SimMetrics/SimilarityClasses/edit distance/Levenstein.cs new file mode 100644 index 0000000..4539495 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/edit distance/Levenstein.cs @@ -0,0 +1,202 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// levenstein implements the levenstein distance function. + /// + [Serializable] + sealed public class Levenstein : AbstractStringMetric { + const double defaultPerfectMatchScore = 1.0; + const double defaultMismatchScore = 0.0; + + /// + /// constructor to load dummy Java converter classes only + /// + public Levenstein() { + dCostFunction = new SubCostRange0To1(); + } + + /// + /// the private cost function used in the levenstein distance. + /// + AbstractSubstitutionCost dCostFunction; + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 0.00018F; + + /// + /// gets the similarity of the two strings using levenstein distance. + /// + /// first word + /// second word + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double levensteinDistance = GetUnnormalisedSimilarity(firstWord, secondWord); + double maxLen = firstWord.Length; + if (maxLen < secondWord.Length) { + maxLen = secondWord.Length; + } + if (maxLen == defaultMismatchScore) { + return defaultPerfectMatchScore; + } + else { + return defaultPerfectMatchScore - levensteinDistance / maxLen; + } + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + /// + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return firstLength * secondLength * estimatedTimingConstant; + } + return defaultMismatchScore; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + /// + ///

+ /// Copy character from string1 over to string2 (cost 0) + /// Delete a character in string1 (cost 1) + /// Insert a character in string2 (cost 1) + /// Substitute one character for another (cost 1) + ///

+ /// D(i-1,j-1) + d(si,tj) //subst/copy + /// D(i,j) = min D(i-1,j)+1 //insert + /// D(i,j-1)+1 //delete + ///

+ /// d(i,j) is a function whereby d(c,d)=0 if c=d, 1 else. + /// + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + // Step 1 + int n = firstWord.Length; + int m = secondWord.Length; + if (n == 0) { + return m; + } + if (m == 0) { + return n; + } + + double[][] d = new double[n + 1][]; + for (int i = 0; i < n + 1; i++) { + d[i] = new double[m + 1]; + } + + // Step 2 + for (int i = 0; i <= n; i++) { + d[i][0] = i; + } + for (int j = 0; j <= m; j++) { + d[0][j] = j; + } + + // Step 3 + for (int i = 1; i <= n; i++) { + // Step 4 + for (int j = 1; j <= m; j++) { + // Step 5 + double cost = dCostFunction.GetCost(firstWord, i - 1, secondWord, j - 1); + // Step 6 + d[i][j] = MathFunctions.MinOf3(d[i - 1][j] + 1.0, d[i][j - 1] + 1.0, d[i - 1][j - 1] + cost); + } + } + + // Step 7 + return d[n][m]; + } + return 0.0; + } + + ///

+ /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { get { return "Implements the basic Levenstein algorithm providing a similarity measure between two strings"; } } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "Levenstein"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/edit distance/NeedlemanWunch.cs b/src/SimMetrics/SimilarityClasses/edit distance/NeedlemanWunch.cs new file mode 100644 index 0000000..55d5bd0 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/edit distance/NeedlemanWunch.cs @@ -0,0 +1,233 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// needlemanwunch implements an edit distance function + /// + [Serializable] + sealed public class NeedlemanWunch : AbstractStringMetric { + const double defaultGapCost = 2.0; + const double defaultMismatchScore = 0.0; + const double defaultPerfectMatchScore = 1.0; + + /// + /// constructor + /// + public NeedlemanWunch() : this(defaultGapCost, new SubCostRange0To1()) {} + + /// + /// constructor + /// + /// the cost of a gap + public NeedlemanWunch(double costG) : this(costG, new SubCostRange0To1()) {} + + /// + /// constructor + /// + /// the cost of a gap + /// the cost function to use + public NeedlemanWunch(double costG, AbstractSubstitutionCost costFunction) { + gapCost = costG; + dCostFunction = costFunction; + } + + /// + /// constructor + /// + /// the cost function to use + public NeedlemanWunch(AbstractSubstitutionCost costFunction) : this(defaultGapCost, costFunction) {} + + /// + /// the private cost function used in the levenstein distance. + /// + AbstractSubstitutionCost dCostFunction; + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 0.0001842F; + /// + /// the cost of a gap. + /// + double gapCost; + + /// + /// gets the similarity of the two strings using Needleman Wunch distance. + /// + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double needlemanWunch = GetUnnormalisedSimilarity(firstWord, secondWord); + double maxValue = Math.Max(firstWord.Length, secondWord.Length); + double minValue = maxValue; + if (dCostFunction.MaxCost > gapCost) { + maxValue *= dCostFunction.MaxCost; + } + else { + maxValue *= gapCost; + } + if (dCostFunction.MinCost < gapCost) { + minValue *= dCostFunction.MinCost; + } + else { + minValue *= gapCost; + } + if (minValue < defaultMismatchScore) { + maxValue -= minValue; + needlemanWunch -= minValue; + } + if (maxValue == defaultMismatchScore) { + return defaultPerfectMatchScore; + } + else { + return defaultPerfectMatchScore - needlemanWunch / maxValue; + } + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return firstLength * secondLength * estimatedTimingConstant; + } + return defaultMismatchScore; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + int n = firstWord.Length; + int m = secondWord.Length; + if (n == 0) { + return m; + } + if (m == 0) { + return n; + } + double[][] d = new double[n + 1][]; + for (int i = 0; i < n + 1; i++) { + d[i] = new double[m + 1]; + } + for (int i = 0; i <= n; i++) { + d[i][0] = i; + } + + for (int j = 0; j <= m; j++) { + d[0][j] = j; + } + + for (int i = 1; i <= n; i++) { + for (int j = 1; j <= m; j++) { + double cost = dCostFunction.GetCost(firstWord, i - 1, secondWord, j - 1); + d[i][j] = MathFunctions.MinOf3(d[i - 1][j] + gapCost, d[i][j - 1] + gapCost, d[i - 1][j - 1] + cost); + } + } + + return d[n][m]; + } + return 0.0; + } + + /// + /// set/get the d(i,j) cost function. + /// + public AbstractSubstitutionCost DCostFunction { get { return dCostFunction; } set { dCostFunction = value; } } + + /// + /// sets/gets the gap cost for the distance function. + /// + public double GapCost { get { return gapCost; } set { gapCost = value; } } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Needleman-Wunch algorithm providing an edit distance based similarity measure between two strings"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "NeedlemanWunch"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/edit distance/SmithWaterman.cs b/src/SimMetrics/SimilarityClasses/edit distance/SmithWaterman.cs new file mode 100644 index 0000000..1da1295 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/edit distance/SmithWaterman.cs @@ -0,0 +1,241 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// implements the Smith-Waterman edit distance function + /// + [Serializable] + sealed public class SmithWaterman : AbstractStringMetric { + const double defaultGapCost = 0.5; + const double defaultMismatchScore = 0.0; + const double defaultPerfectMatchScore = 1.0; + /// + /// a constant for calculating the estimated timing cost. + /// + const double estimatedTimingConstant = 0.000161F; + + /// + /// constructor - default (empty). + /// + public SmithWaterman() : this(defaultGapCost, new SubCostRange1ToMinus2()) {} + + /// + /// constructor + /// + /// the cost of a gap + public SmithWaterman(double costG) : this(costG, new SubCostRange1ToMinus2()) {} + + /// + /// constructor + /// + /// the cost of a gap + /// the cost function to use + public SmithWaterman(double costG, AbstractSubstitutionCost costFunction) { + gapCost = costG; + dCostFunction = costFunction; + } + + /// + /// constructor + /// + /// the cost function to use + public SmithWaterman(AbstractSubstitutionCost costFunction) : this(defaultGapCost, costFunction) {} + + /// + /// the private cost function used in the levenstein distance. + /// + AbstractSubstitutionCost dCostFunction; + /// + /// the cost of a gap. + /// + double gapCost; + + /// + /// gets the similarity of the two strings using Smith Waterman distance. + /// + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double smithWaterman = GetUnnormalisedSimilarity(firstWord, secondWord); + double maxValue = Math.Min(firstWord.Length, secondWord.Length); + if (dCostFunction.MaxCost > -gapCost) { + maxValue *= dCostFunction.MaxCost; + } + else { + maxValue *= (-gapCost); + } + if (maxValue == defaultMismatchScore) { + return defaultPerfectMatchScore; + } + else { + return smithWaterman / maxValue; + } + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return (firstLength * secondLength + firstLength + secondLength) * estimatedTimingConstant; + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + int n = firstWord.Length; + int m = secondWord.Length; + if (n == 0) { + return m; + } + if (m == 0) { + return n; + } + double[][] d = new double[n][]; + for (int i = 0; i < n; i++) { + d[i] = new double[m]; + } + double maxSoFar = defaultMismatchScore; + for (int i = 0; i < n; i++) { + double cost = dCostFunction.GetCost(firstWord, i, secondWord, 0); + if (i == 0) { + d[0][0] = MathFunctions.MaxOf3(defaultMismatchScore, -gapCost, cost); + } + else { + d[i][0] = MathFunctions.MaxOf3(defaultMismatchScore, d[i - 1][0] - gapCost, cost); + } + if (d[i][0] > maxSoFar) { + maxSoFar = d[i][0]; + } + } + + for (int j = 0; j < m; j++) { + double cost = dCostFunction.GetCost(firstWord, 0, secondWord, j); + if (j == 0) { + d[0][0] = MathFunctions.MaxOf3(defaultMismatchScore, -gapCost, cost); + } + else { + d[0][j] = MathFunctions.MaxOf3(defaultMismatchScore, d[0][j - 1] - gapCost, cost); + } + if (d[0][j] > maxSoFar) { + maxSoFar = d[0][j]; + } + } + + for (int i = 1; i < n; i++) { + for (int j = 1; j < m; j++) { + double cost = dCostFunction.GetCost(firstWord, i, secondWord, j); + d[i][j] = + MathFunctions.MaxOf4(defaultMismatchScore, d[i - 1][j] - gapCost, d[i][j - 1] - gapCost, + d[i - 1][j - 1] + cost); + if (d[i][j] > maxSoFar) { + maxSoFar = d[i][j]; + } + } + } + + return maxSoFar; + } + return 0.0; + } + + /// + /// get the d(i,j) cost function. + /// + public AbstractSubstitutionCost DCostFunction { get { return dCostFunction; } set { DCostFunction = value; } } + + /// + /// the gap cost for the distance function. + /// + public double GapCost { get { return gapCost; } set { gapCost = value; } } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { get { return "Implements the Smith-Waterman algorithm providing a similarity measure between two string"; } } + + /// + /// returns the string identifier for the metric . + /// + public override string ShortDescriptionString { get { return "SmithWaterman"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/edit distance/SmithWatermanGotoh.cs b/src/SimMetrics/SimilarityClasses/edit distance/SmithWatermanGotoh.cs new file mode 100644 index 0000000..a95cb8a --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/edit distance/SmithWatermanGotoh.cs @@ -0,0 +1,130 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// implements the Gotoh extension of the smith waterman method incorporating affine gaps in the strings + /// + [Serializable] + sealed public class SmithWatermanGotoh : SmithWatermanGotohWindowedAffine { + /// + /// + /// + const int affineGapWindowSize = 0x7fffffff; + + /// + /// a constant for calculating the estimated timing cost. + /// + const double estimatedTimingConstant = 2.2e-005F; + + /// + /// constructor - default (empty). + /// + public SmithWatermanGotoh() + : base(new AffineGapRange5To0Multiplier1(), new SubCostRange5ToMinus3(), affineGapWindowSize) {} + + /// + /// constructor + /// + /// the gap cost function + public SmithWatermanGotoh(AbstractAffineGapCost gapCostFunction) + : base(gapCostFunction, new SubCostRange5ToMinus3(), affineGapWindowSize) {} + + /// + /// constructor + /// + /// the gap cost function + /// the cost function to use + public SmithWatermanGotoh(AbstractAffineGapCost gapCostFunction, AbstractSubstitutionCost costFunction) + : base(gapCostFunction, costFunction, affineGapWindowSize) {} + + /// + /// constructor + /// + /// the cost function to use + public SmithWatermanGotoh(AbstractSubstitutionCost costFunction) + : base(new AffineGapRange5To0Multiplier1(), costFunction, affineGapWindowSize) {} + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return + (firstLength * secondLength * firstLength + firstLength * secondLength * secondLength) * + estimatedTimingConstant; + } + return 0.0; + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { get { return "Implements the Smith-Waterman-Gotoh algorithm providing a similarity measure between two string"; } } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "SmithWatermanGotoh"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/edit distance/SmithWatermanGotohWindowedAffine.cs b/src/SimMetrics/SimilarityClasses/edit distance/SmithWatermanGotohWindowedAffine.cs new file mode 100644 index 0000000..f461b51 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/edit distance/SmithWatermanGotohWindowedAffine.cs @@ -0,0 +1,333 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// implements the smith waterman with gotoh extension using a windowed affine gap. + /// + [Serializable] + public class SmithWatermanGotohWindowedAffine : AbstractStringMetric { + const double defaultMismatchScore = 0.0; + const double defaultPerfectScore = 1.0; + const int defaultWindowSize = 100; + + /// + /// constructor - default (empty). + /// + public SmithWatermanGotohWindowedAffine() + : this(new AffineGapRange5To0Multiplier1(), new SubCostRange5ToMinus3(), defaultWindowSize) {} + + /// + /// constructor + /// + /// the size of the affine gap window to use + public SmithWatermanGotohWindowedAffine(int affineGapWindowSize) + : this(new AffineGapRange5To0Multiplier1(), new SubCostRange5ToMinus3(), affineGapWindowSize) {} + + /// + /// constructor + /// + /// the gap cost function + public SmithWatermanGotohWindowedAffine(AbstractAffineGapCost gapCostFunction) + : this(gapCostFunction, new SubCostRange5ToMinus3(), defaultWindowSize) {} + + /// + /// constructor + /// + /// the gap cost function + /// the size of the affine gap window to use + public SmithWatermanGotohWindowedAffine(AbstractAffineGapCost gapCostFunction, int affineGapWindowSize) + : this(gapCostFunction, new SubCostRange5ToMinus3(), affineGapWindowSize) {} + + /// + /// constructor + /// + /// the gap cost function + /// the cost function to use + public SmithWatermanGotohWindowedAffine(AbstractAffineGapCost gapCostFunction, AbstractSubstitutionCost costFunction) + : this(gapCostFunction, costFunction, defaultWindowSize) {} + + /// + /// constructor + /// + /// the gap cost function + /// the cost function to use + /// the size of the affine gap window to use + public SmithWatermanGotohWindowedAffine(AbstractAffineGapCost gapCostFunction, AbstractSubstitutionCost costFunction, + int affineGapWindowSize) { + gGapFunction = gapCostFunction; + dCostFunction = costFunction; + windowSize = affineGapWindowSize; + } + + /// + /// constructor + /// + /// the cost function to use + public SmithWatermanGotohWindowedAffine(AbstractSubstitutionCost costFunction) + : this(new AffineGapRange5To0Multiplier1(), costFunction, defaultWindowSize) {} + + /// + /// constructor + /// + /// the cost function to use + /// the size of the affine gap window to use + public SmithWatermanGotohWindowedAffine(AbstractSubstitutionCost costFunction, int affineGapWindowSize) + : this(new AffineGapRange5To0Multiplier1(), costFunction, affineGapWindowSize) {} + + /// + /// the private cost function used in the SmithWatermanGotoh distance. + /// + AbstractSubstitutionCost dCostFunction; + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 4.5e-005F; + /// + /// the private cost function for affine gaps. + /// + AbstractAffineGapCost gGapFunction; + /// + /// private field for the maximum affine gap window size. + /// + int windowSize; + + /// + /// gets the similarity of the two strings using Smith-Waterman-Gotoh distance. + /// + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double smithWatermanGotoh = GetUnnormalisedSimilarity(firstWord, secondWord); + double maxValue = Math.Min(firstWord.Length, secondWord.Length); + if (dCostFunction.MaxCost > -gGapFunction.MaxCost) { + maxValue *= dCostFunction.MaxCost; + } + else { + maxValue *= (-gGapFunction.MaxCost); + } + if (maxValue == defaultMismatchScore) { + return defaultPerfectScore; + } + else { + return smithWatermanGotoh / maxValue; + } + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return + (firstLength * secondLength * windowSize + firstLength * secondLength * windowSize) * + estimatedTimingConstant; + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + int n = firstWord.Length; + int m = secondWord.Length; + // check for zero length input + if (n == 0) { + return m; + } + if (m == 0) { + return n; + } + double[][] d = new double[n][]; + for (int i = 0; i < n; i++) { + d[i] = new double[m]; + } + //process first row and column first as no need to consider previous rows/columns + double maxSoFar = 0.0; + for (int i = 0; i < n; i++) { + // get the substution cost + double cost = dCostFunction.GetCost(firstWord, i, secondWord, 0); + if (i == 0) { + d[0][0] = Math.Max(defaultMismatchScore, cost); + } + else { + double maxGapCost = defaultMismatchScore; + int windowStart = i - windowSize; + if (windowStart < 1) { + windowStart = 1; + } + for (int k = windowStart; k < i; k++) { + maxGapCost = Math.Max(maxGapCost, d[i - k][0] - gGapFunction.GetCost(firstWord, i - k, i)); + } + + d[i][0] = MathFunctions.MaxOf3(defaultMismatchScore, maxGapCost, cost); + } + //update max possible if available + if (d[i][0] > maxSoFar) { + maxSoFar = d[i][0]; + } + } + + for (int j = 0; j < m; j++) { + // get the substution cost + double cost = dCostFunction.GetCost(firstWord, 0, secondWord, j); + if (j == 0) { + d[0][0] = Math.Max(defaultMismatchScore, cost); + } + else { + double maxGapCost = defaultMismatchScore; + int windowStart = j - windowSize; + if (windowStart < 1) { + windowStart = 1; + } + for (int k = windowStart; k < j; k++) { + maxGapCost = Math.Max(maxGapCost, d[0][j - k] - gGapFunction.GetCost(secondWord, j - k, j)); + } + + d[0][j] = MathFunctions.MaxOf3(defaultMismatchScore, maxGapCost, cost); + } + //update max possible if available + if (d[0][j] > maxSoFar) { + maxSoFar = d[0][j]; + } + } + + // cycle through rest of table filling values from the lowest cost value of the three part cost function + for (int i = 1; i < n; i++) { + for (int j = 1; j < m; j++) { + // get the substution cost + double cost = dCostFunction.GetCost(firstWord, i, secondWord, j); + // find lowest cost at point from three possible + double maxGapCost1 = defaultMismatchScore; + double maxGapCost2 = defaultMismatchScore; + int windowStart = i - windowSize; + if (windowStart < 1) { + windowStart = 1; + } + for (int k = windowStart; k < i; k++) { + maxGapCost1 = Math.Max(maxGapCost1, d[i - k][j] - gGapFunction.GetCost(firstWord, i - k, i)); + } + + windowStart = j - windowSize; + if (windowStart < 1) { + windowStart = 1; + } + for (int k = windowStart; k < j; k++) { + maxGapCost2 = Math.Max(maxGapCost2, d[i][j - k] - gGapFunction.GetCost(secondWord, j - k, j)); + } + + d[i][j] = MathFunctions.MaxOf4(defaultMismatchScore, maxGapCost1, maxGapCost2, d[i - 1][j - 1] + cost); + if (d[i][j] > maxSoFar) { + maxSoFar = d[i][j]; + } + } + } + + // return max value within matrix as holds the maximum edit score + return maxSoFar; + } + return 0.0; + } + + /// + /// get the d(i,j) cost function. + /// + public AbstractSubstitutionCost DCostFunction { get { return dCostFunction; } set { dCostFunction = value; } } + + /// + /// get the g gap cost function. + /// + public AbstractAffineGapCost GGapFunction { get { return gGapFunction; } set { gGapFunction = value; } } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Smith-Waterman-Gotoh algorithm with a windowed affine gap providing a similarity measure between two string"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "SmithWatermanGotohWindowedAffine"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/jaro and jaroWinkler/Jaro.cs b/src/SimMetrics/SimilarityClasses/jaro and jaroWinkler/Jaro.cs new file mode 100644 index 0000000..6cf8930 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/jaro and jaroWinkler/Jaro.cs @@ -0,0 +1,192 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +using System; +[assembly : CLSCompliant(true)] + +namespace SimMetricsMetricUtilities { + using System.Text; + using SimMetricsApi; + + /// + /// implements the Jaro string Metric. + /// + [Serializable] + sealed public class Jaro : AbstractStringMetric { + const double defaultMismatchScore = 0.0; + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 4.12e-005F; + + /// + /// gets the similarity of the two strings using Jaro distance. + /// + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + //get half the length of the string rounded up - (this is the distance used for acceptable transpositions) + int halflen = Math.Min(firstWord.Length, secondWord.Length) / 2 + 1; + //get common characters + StringBuilder common1 = GetCommonCharacters(firstWord, secondWord, halflen); + int commonMatches = common1.Length; + //check for zero in common + if (commonMatches == 0) { + return defaultMismatchScore; + } + StringBuilder common2 = GetCommonCharacters(secondWord, firstWord, halflen); + //check for same length common strings returning 0.0f is not the same + if (commonMatches != common2.Length) { + return defaultMismatchScore; + } + //get the number of transpositions + int transpositions = 0; + for (int i = 0; i < commonMatches; i++) { + if (common1[i] != common2[i]) { + transpositions++; + } + } + + //calculate jaro metric + transpositions /= 2; + double tmp1; + tmp1 = commonMatches / (3.0 * firstWord.Length) + commonMatches / (3.0 * secondWord.Length) + + (commonMatches - transpositions) / (3.0 * commonMatches); + return tmp1; + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return firstLength * secondLength * estimatedTimingConstant; + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + return GetSimilarity(firstWord, secondWord); + } + + /// + /// returns a string buffer of characters from string1 within string2 if they are of a given + /// distance seperation from the position in string1. + /// + /// string one + /// string two + /// separation distance + /// a string buffer of characters from string1 within string2 if they are of a given + /// distance seperation from the position in string1 + static StringBuilder GetCommonCharacters(string firstWord, string secondWord, int distanceSep) { + if ((firstWord != null) && (secondWord != null)) { + StringBuilder returnCommons = new StringBuilder(); + StringBuilder copy = new StringBuilder(secondWord); + for (int i = 0; i < firstWord.Length; i++) { + char ch = firstWord[i]; + bool foundIt = false; + for (int j = Math.Max(0, i - distanceSep); + !foundIt && j < Math.Min(i + distanceSep, secondWord.Length); + j++) { + if (copy[j] == ch) { + foundIt = true; + returnCommons.Append(ch); + copy[j] = '#'; + } + } + } + + return returnCommons; + } + return null; + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Jaro algorithm providing a similarity measure between two strings allowing character transpositions to a degree"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "Jaro"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/jaro and jaroWinkler/JaroWinkler.cs b/src/SimMetrics/SimilarityClasses/jaro and jaroWinkler/JaroWinkler.cs new file mode 100644 index 0000000..63160bd --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/jaro and jaroWinkler/JaroWinkler.cs @@ -0,0 +1,176 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// implements the Jaro Winkler string metric + /// + [Serializable] + sealed public class JaroWinkler : AbstractStringMetric { + /// + /// maximum prefix length to use. + /// + /// changed from the original 6 to 4 to match the original definition of JaroWinkler + /// Chris Parkinson .NET 2.0 implementation only + const int minPrefixTestLength = 4; + + /// + /// prefix adjustment scale. + /// + const double prefixAdustmentScale = 0.1F; + + /// + /// constructor + /// + public JaroWinkler() { + jaroStringMetric = new Jaro(); + } + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 4.342e-005F; + + /// + /// private string metric allowing internal metric to be composed. + /// + AbstractStringMetric jaroStringMetric; + + /// + /// gets the similarity measure of the JaroWinkler metric for the given strings. + /// + /// + /// + /// 0-1 similarity measure of the JaroWinkler metric + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double dist = jaroStringMetric.GetSimilarity(firstWord, secondWord); + int prefixLength = GetPrefixLength(firstWord, secondWord); + return dist + prefixLength * prefixAdustmentScale * (1.0 - dist); + } + return 0.0; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return firstLength * secondLength * estimatedTimingConstant; + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + return GetSimilarity(firstWord, secondWord); + } + + /// + /// gets the prefix length found of common characters at the begining of the strings. + /// + /// + /// + /// the prefix length found of common characters at the begining of the strings + static int GetPrefixLength(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + int n = MathFunctions.MinOf3(minPrefixTestLength, firstWord.Length, secondWord.Length); + for (int i = 0; i < n; i++) { + if (firstWord[i] != secondWord[i]) { + return i; + } + } + + return n; + } + return minPrefixTestLength; + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Jaro-Winkler algorithm providing a similarity measure between two strings allowing character transpositions to a degree adjusting the weighting for common prefixes"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "JaroWinkler"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/length based/ChapmanLengthDeviation.cs b/src/SimMetrics/SimilarityClasses/length based/ChapmanLengthDeviation.cs new file mode 100644 index 0000000..2030a1b --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/length based/ChapmanLengthDeviation.cs @@ -0,0 +1,127 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is notclear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + + /// + /// implements a metric determined by the difference in string lengths + /// + [Serializable] + sealed public class ChapmanLengthDeviation : AbstractStringMetric { + /// + /// gets the similarity of the two strings using ChapmanLengthDeviation + /// + /// this is simply a ratio of difference in string lengths between those compared. + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + if (firstLength >= secondLength) { + return secondLength / firstLength; + } + else { + return firstLength / secondLength; + } + } + return 0.0; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + return GetSimilarity(firstWord, secondWord); + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Chapman Length Deviation algorithm whereby the length deviation of the word strings is used to determine if the strings are similar in size - This apporach is not intended to be used single handedly but rather alongside other approaches"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "ChapmanLengthDeviation"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/length based/ChapmanMeanLength.cs b/src/SimMetrics/SimilarityClasses/length based/ChapmanMeanLength.cs new file mode 100644 index 0000000..d5c6641 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/length based/ChapmanMeanLength.cs @@ -0,0 +1,134 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + + /// + /// implements Chapman Mean Length metric + /// + [Serializable] + sealed public class ChapmanMeanLength : AbstractStringMetric { + /// + /// defines the internal max string length beyond which 1.0 is always returned. + /// + const int chapmanMeanLengthMaxString = 500; + const double defaultMismatchScore = 0.0; + const double defaultPerfectScore = 1.0; + + /// + /// gets the similarity of the two strings using ChapmanMeanLength + /// + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double bothLengths = secondWord.Length + firstWord.Length; + if (bothLengths > chapmanMeanLengthMaxString) { + return defaultPerfectScore; + } + else { + double oneMinusBothScaled = (chapmanMeanLengthMaxString - bothLengths) / chapmanMeanLengthMaxString; + return + defaultPerfectScore - oneMinusBothScaled * oneMinusBothScaled * oneMinusBothScaled * oneMinusBothScaled; + } + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + return GetSimilarity(firstWord, secondWord); + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Chapman Mean Length algorithm provides a similarity measure between two strings from size of the mean length of the vectors - this approach is suppossed to be used to determine which metrics may be best to apply rather than giveing a valid response itself"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "ChapmanMeanLength"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/token based/BlockDistance.cs b/src/SimMetrics/SimilarityClasses/token based/BlockDistance.cs new file mode 100644 index 0000000..1075142 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/token based/BlockDistance.cs @@ -0,0 +1,182 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + ///a block distance implementation metric + /// + [Serializable] + sealed public class BlockDistance : AbstractStringMetric { + /// + /// constructor - default (empty). + /// + public BlockDistance() : this(new TokeniserWhitespace()) {} + + /// + /// constructor + /// + /// the tokeniser to use should a different tokeniser be required + public BlockDistance(ITokeniser tokeniserToUse) { + tokeniser = tokeniserToUse; + tokenUtilities = new TokeniserUtilities(); + } + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 6.445714e-005F; + /// + /// + /// + TokeniserUtilities tokenUtilities; + /// + /// tokeniser for tokenisation of the query strings. + /// + ITokeniser tokeniser; + + /// + /// gets the similarity of the two strings using BlockDistance. + /// + /// + /// + /// a 0-1 similarity score + public override double GetSimilarity(string firstWord, string secondWord) { + Collection firstTokens = tokeniser.Tokenize(firstWord); + Collection secondTokens = tokeniser.Tokenize(secondWord); + int totalPossible = firstTokens.Count + secondTokens.Count; + double totalDistance = GetActualSimilarity(firstTokens, secondTokens); + return (totalPossible - totalDistance) / totalPossible; + } + + /// + /// gets a div class xhtml similarity explaining the operation of the metric. + /// + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// string 1 + /// string 2 + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + double firstTokens = tokeniser.Tokenize(firstWord).Count; + double secondTokens = tokeniser.Tokenize(secondWord).Count; + return + ((firstTokens + secondTokens) * firstTokens + (firstTokens + secondTokens) * secondTokens) * + estimatedTimingConstant; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + Collection firstTokens = tokeniser.Tokenize(firstWord); + Collection secondTokens = tokeniser.Tokenize(secondWord); + return GetActualSimilarity(firstTokens, secondTokens); + } + + double GetActualSimilarity(Collection firstTokens, Collection secondTokens) { + Collection allTokens = tokenUtilities.CreateMergedList(firstTokens, secondTokens); + + int totalDistance = 0; + + foreach (string token in allTokens) { + int countInfirstWord = 0; + int countInsecondWord = 0; + if (firstTokens.Contains(token)) { + countInfirstWord++; + } + if (secondTokens.Contains(token)) { + countInsecondWord++; + } + + if (countInfirstWord > countInsecondWord) { + totalDistance += countInfirstWord - countInsecondWord; + } + else { + totalDistance += countInsecondWord - countInfirstWord; + } + } + return totalDistance; + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Block distance algorithm whereby vector space block distance is used to determine a similarity"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "BlockDistance"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/token based/CosineSimilarity.cs b/src/SimMetrics/SimilarityClasses/token based/CosineSimilarity.cs new file mode 100644 index 0000000..b385517 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/token based/CosineSimilarity.cs @@ -0,0 +1,155 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// + /// + [Serializable] + sealed public class CosineSimilarity : AbstractStringMetric { + /// + /// constructor + /// + public CosineSimilarity() : this(new TokeniserWhitespace()) {} + + /// + /// constructor + /// + /// the tokeniser to use should a different tokeniser be required + public CosineSimilarity(ITokeniser tokeniserToUse) { + tokeniser = tokeniserToUse; + tokenUtilities = new TokeniserUtilities(); + } + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 3.833714e-007F; + + TokeniserUtilities tokenUtilities; + + /// + /// private tokeniser for tokenisation of the query strings. + /// + ITokeniser tokeniser; + + /// + /// gets the similarity of the two strings using CosineSimilarity. + /// + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + if (tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord)).Count > 0) { + return + tokenUtilities.CommonSetTerms() / + (Math.Pow(tokenUtilities.FirstSetTokenCount, 0.5) * Math.Pow(tokenUtilities.SecondSetTokenCount, 0.5)); + } + } + return 0.0; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return (firstLength + secondLength) * ((firstLength + secondLength) * estimatedTimingConstant); + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// first word + /// second word + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + return GetSimilarity(firstWord, secondWord); + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Cosine Similarity algorithm providing a similarity measure between two strings from the angular divergence within term based vector space"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "CosineSimilarity"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/token based/DiceSimilarity.cs b/src/SimMetrics/SimilarityClasses/token based/DiceSimilarity.cs new file mode 100644 index 0000000..d3dfc1d --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/token based/DiceSimilarity.cs @@ -0,0 +1,156 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// + /// + [Serializable] + sealed public class DiceSimilarity : AbstractStringMetric { + /// + /// constructor + /// + public DiceSimilarity() : this(new TokeniserWhitespace()) {} + + /// + /// constructor + /// + /// the tokeniser to use should a different tokeniser be required + public DiceSimilarity(ITokeniser tokeniserToUse) { + tokeniser = tokeniserToUse; + tokenUtilities = new TokeniserUtilities(); + } + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 3.445714e-007F; + + TokeniserUtilities tokenUtilities; + + /// + /// private tokeniser for tokenisation of the query strings. + /// + ITokeniser tokeniser; + + /// + /// gets the similarity of the two strings using DiceSimilarity + /// + /// + /// + /// a value between 0-1 of the similarity + /// Dices coefficient = (2*Common Terms) / (Number of terms in String1 + Number of terms in String2). + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + if (tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord)).Count > 0) { + return + (2.0 * tokenUtilities.CommonSetTerms()) / + (tokenUtilities.FirstSetTokenCount + tokenUtilities.SecondSetTokenCount); + } + } + return 0.0; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstLength = firstWord.Length; + double secondLength = secondWord.Length; + return (firstLength + secondLength) * ((firstLength + secondLength) * estimatedTimingConstant); + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + return GetSimilarity(firstWord, secondWord); + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the DiceSimilarity algorithm providing a similarity measure between two strings using the vector space of present terms"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "DiceSimilarity"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/token based/EuclideanDistance.cs b/src/SimMetrics/SimilarityClasses/token based/EuclideanDistance.cs new file mode 100644 index 0000000..f2c40e2 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/token based/EuclideanDistance.cs @@ -0,0 +1,194 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// + /// + [Serializable] + sealed public class EuclideanDistance : AbstractStringMetric { + const double defaultMismatchScore = 0.0; + + /// + /// constructor + /// + public EuclideanDistance() : this(new TokeniserWhitespace()) {} + + /// + /// constructor + /// + /// the tokeniser to use should a different tokeniser be required + public EuclideanDistance(ITokeniser tokeniserToUse) { + tokeniser = tokeniserToUse; + tokenUtilities = new TokeniserUtilities(); + } + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 7.445714e-005F; + TokeniserUtilities tokenUtilities; + /// + /// private tokeniser for tokenisation of the query strings. + /// + ITokeniser tokeniser; + + /// + /// gets the actual euclidean distance ie not the value between 0-1. + /// + /// + /// + /// the actual euclidean distance + public double GetEuclidDistance(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + Collection firstTokens = tokeniser.Tokenize(firstWord); + Collection secondTokens = tokeniser.Tokenize(secondWord); + + return GetActualDistance(firstTokens, secondTokens); + } + return defaultMismatchScore; + } + + /// + /// gets the similarity of the two strings using EuclideanDistance + /// + /// + /// + /// a value between 0-1 of the similarity 1.0 identical + /// the 0-1 return is calcualted from the maximum possible Euclidean + /// distance between the strings from the number of terms within them. + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double difference = GetUnnormalisedSimilarity(firstWord, secondWord); + double totalPossible = Math.Sqrt(tokenUtilities.FirstTokenCount + tokenUtilities.SecondTokenCount); + return (totalPossible - difference) / totalPossible; + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstTokens = tokeniser.Tokenize(firstWord).Count; + double secondTokens = tokeniser.Tokenize(secondWord).Count; + return + ((firstTokens + secondTokens) * firstTokens + (firstTokens + secondTokens) * secondTokens) * + estimatedTimingConstant; + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + return GetEuclidDistance(firstWord, secondWord); + } + + double GetActualDistance(Collection firstTokens, Collection secondTokens) { + Collection allTokens = tokenUtilities.CreateMergedList(firstTokens, secondTokens); + + int totalDistance = 0; + + foreach (string token in allTokens) { + int countInfirstWord = 0; + int countInsecondWord = 0; + if (firstTokens.Contains(token)) { + countInfirstWord++; + } + if (secondTokens.Contains(token)) { + countInsecondWord++; + } + + totalDistance += (countInfirstWord - countInsecondWord) * (countInfirstWord - countInsecondWord); + } + return Math.Sqrt(totalDistance); + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Euclidean Distancey algorithm providing a similarity measure between two stringsusing the vector space of combined terms as the dimensions"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "EuclideanDistance"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/token based/JaccardSimilarity.cs b/src/SimMetrics/SimilarityClasses/token based/JaccardSimilarity.cs new file mode 100644 index 0000000..800935f --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/token based/JaccardSimilarity.cs @@ -0,0 +1,157 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// + /// + [Serializable] + sealed public class JaccardSimilarity : AbstractStringMetric { + const double defaultMismatchScore = 0.0; + + /// + /// + /// + public JaccardSimilarity() : this(new TokeniserWhitespace()) {} + + /// + /// the tokeniser to use should a different tokeniser be required + /// + /// + public JaccardSimilarity(ITokeniser tokeniserToUse) { + tokeniser = tokeniserToUse; + tokenUtilities = new TokeniserUtilities(); + } + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 0.00014F; + + TokeniserUtilities tokenUtilities; + + /// + /// private tokeniser for tokenisation of the query strings. + /// + ITokeniser tokeniser; + + /// + /// gets the similarity of the two strings using JaccardSimilarity. + /// + /// + /// + /// a value between 0-1 of the similarity + /// Each instance is represented as a Jaccard vector similarity function. The Jaccard between two vectors X and Y is + /// (X*Y) / (|X||Y|-(X*Y)) + /// where (X*Y) is the inner product of X and Y, and |X| = (X*X)^1/2, i.e. the Euclidean norm of X. + /// This can more easily be described as ( |X and Y| ) / ( | X or Y | ) + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + Collection allTokens = + tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord)); + if (allTokens.Count > 0) { + return (double)tokenUtilities.CommonSetTerms() / (double)allTokens.Count; + } + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstTokens = tokeniser.Tokenize(firstWord).Count; + double secondTokens = tokeniser.Tokenize(secondWord).Count; + return firstTokens * secondTokens * estimatedTimingConstant; + } + return defaultMismatchScore; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + return GetSimilarity(firstWord, secondWord); + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { get { return "Implements the Jaccard Similarity algorithm providing a similarity measure between two strings"; } } + + /// + /// returns the string identifier for the metric . + /// + public override string ShortDescriptionString { get { return "JaccardSimilarity"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/token based/MatchingCoefficient.cs b/src/SimMetrics/SimilarityClasses/token based/MatchingCoefficient.cs new file mode 100644 index 0000000..bd39c63 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/token based/MatchingCoefficient.cs @@ -0,0 +1,163 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// + /// + [Serializable] + sealed public class MatchingCoefficient : AbstractStringMetric { + const double defaultMismatchScore = 0.0; + + /// + /// + /// + public MatchingCoefficient() : this(new TokeniserWhitespace()) {} + + /// + /// the tokeniser to use should a different tokeniser be required + /// + /// + public MatchingCoefficient(ITokeniser tokeniserToUse) { + tokeniser = tokeniserToUse; + tokenUtilities = new TokeniserUtilities(); + } + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 0.0002F; + TokeniserUtilities tokenUtilities; + /// + /// private tokeniser for tokenisation of the query strings. + /// + ITokeniser tokeniser; + + /// + /// gets the similarity of the two strings using MatchingCoefficient. + /// + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double totalFound = GetUnnormalisedSimilarity(firstWord, secondWord); + int totalPossible = Math.Max(tokenUtilities.FirstTokenCount, tokenUtilities.SecondTokenCount); + return totalFound / totalPossible; + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstTokens = tokeniser.Tokenize(firstWord).Count; + double secondTokens = tokeniser.Tokenize(secondWord).Count; + return secondTokens * firstTokens * estimatedTimingConstant; + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + Collection firstTokens = tokeniser.Tokenize(firstWord); + Collection secondTokens = tokeniser.Tokenize(secondWord); + return GetActualSimilarity(firstTokens, secondTokens); + } + + double GetActualSimilarity(Collection firstTokens, Collection secondTokens) { + Collection allTokens = tokenUtilities.CreateMergedList(firstTokens, secondTokens); + int totalFound = 0; + + foreach (string token in firstTokens) { + if (secondTokens.Contains(token)) { + totalFound++; + } + } + return totalFound; + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { get { return "Implements the Matching Coefficient algorithm providing a similarity measure between two strings"; } } + + /// + /// returns the string identifier for the metric . + /// + public override string ShortDescriptionString { get { return "MatchingCoefficient"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/token based/MongeElkan.cs b/src/SimMetrics/SimilarityClasses/token based/MongeElkan.cs new file mode 100644 index 0000000..5344174 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/token based/MongeElkan.cs @@ -0,0 +1,190 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// + /// + [Serializable] + public class MongeElkan : AbstractStringMetric { + const double defaultMismatchScore = 0.0; + + /// + /// basic constructor + /// + public MongeElkan() : this(new TokeniserWhitespace()) {} + + /// + /// constructor taking metric to use + /// + /// the string metric to use + public MongeElkan(AbstractStringMetric metricToUse) { + tokeniser = new TokeniserWhitespace(); + internalStringMetric = metricToUse; + } + + /// + /// constructor taking a tokeniser to use + /// + /// the tokeniser to use should a different tokeniser be required + public MongeElkan(ITokeniser tokeniserToUse) { + tokeniser = tokeniserToUse; + internalStringMetric = new SmithWatermanGotoh(); + } + + /// + /// constructor taking a tokeniser and string metric to use + /// + /// the tokeniser to use should a different tokeniser be required + /// the string metric to use + public MongeElkan(ITokeniser tokeniserToUse, AbstractStringMetric metricToUse) { + tokeniser = tokeniserToUse; + internalStringMetric = metricToUse; + } + + /// + /// private tokeniser for tokenisation of the query strings. + /// + internal ITokeniser tokeniser; + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 0.0344F; + + /// + /// private string metric allowing internal metric to be composed. + /// + AbstractStringMetric internalStringMetric; + + /// + /// gets the similarity of the two strings using Monge Elkan. + /// + /// + /// + /// a value between 0-1 of the similarity + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + Collection firstTokens = tokeniser.Tokenize(firstWord); + Collection secondTokens = tokeniser.Tokenize(secondWord); + + double sumMatches = 0.0; + for (int i = 0; i < firstTokens.Count; i++) { + string sToken = firstTokens[i]; + double maxFound = 0.0; + for (int j = 0; j < secondTokens.Count; j++) { + string tToken = secondTokens[j]; + double found = internalStringMetric.GetSimilarity(sToken, tToken); + if (found > maxFound) { + maxFound = found; + } + } + sumMatches += maxFound; + } + return sumMatches / firstTokens.Count; + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstTokens = tokeniser.Tokenize(firstWord).Count; + double secondTokens = tokeniser.Tokenize(secondWord).Count; + return + ((firstTokens + secondTokens) * firstTokens + (firstTokens + secondTokens) * secondTokens) * + estimatedTimingConstant; + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + // todo check this is valid before use mail sam@dcs.shef.ac.uk if problematic + return GetSimilarity(firstWord, secondWord); + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { get { return "Implements the Monge Elkan algorithm providing an matching style similarity measure between two strings"; } } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "MongeElkan"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/SimilarityClasses/token based/OverlapCoefficient.cs b/src/SimMetrics/SimilarityClasses/token based/OverlapCoefficient.cs new file mode 100644 index 0000000..0e7cc72 --- /dev/null +++ b/src/SimMetrics/SimilarityClasses/token based/OverlapCoefficient.cs @@ -0,0 +1,156 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsMetricUtilities { + using System; + using SimMetricsApi; + using SimMetricsUtilities; + + /// + /// + /// + [Serializable] + sealed public class OverlapCoefficient : AbstractStringMetric { + const double defaultMismatchScore = 0.0; + + /// + /// constructor + /// + public OverlapCoefficient() : this(new TokeniserWhitespace()) {} + + /// + /// Constructor + /// + /// the tokeniser to use should a different tokeniser be required + public OverlapCoefficient(ITokeniser tokeniserToUse) { + tokeniser = tokeniserToUse; + tokenUtilities = new TokeniserUtilities(); + } + + /// + /// a constant for calculating the estimated timing cost. + /// + double estimatedTimingConstant = 0.00014F; + TokeniserUtilities tokenUtilities; + /// + /// private tokeniser for tokenisation of the query strings. + /// + ITokeniser tokeniser; + + /// + /// gets the similarity of the two strings using OverlapCoefficient + /// + /// + /// + /// a value between 0-1 of the similarity + /// overlap_coefficient(q,r) = ( | q and r | ) / min{ | q | , | r | }. + public override double GetSimilarity(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + //Collection allTokens = + tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord)); + return + tokenUtilities.CommonSetTerms() / + (double)Math.Min(tokenUtilities.FirstSetTokenCount, tokenUtilities.SecondSetTokenCount); + } + return defaultMismatchScore; + } + + /// gets a div class xhtml similarity explaining the operation of the metric. + /// string 1 + /// string 2 + /// a div class html section detailing the metric operation. + public override string GetSimilarityExplained(string firstWord, string secondWord) { + throw new NotImplementedException(); + } + + /// + /// gets the estimated time in milliseconds it takes to perform a similarity timing. + /// + /// + /// + /// the estimated time in milliseconds taken to perform the similarity measure + public override double GetSimilarityTimingEstimated(string firstWord, string secondWord) { + if ((firstWord != null) && (secondWord != null)) { + double firstTokens = tokeniser.Tokenize(firstWord).Count; + double secondTokens = tokeniser.Tokenize(secondWord).Count; + return firstTokens * secondTokens * estimatedTimingConstant; + } + return 0.0; + } + + /// + /// gets the un-normalised similarity measure of the metric for the given strings. + /// + /// + /// returns the score of the similarity measure (un-normalised) + public override double GetUnnormalisedSimilarity(string firstWord, string secondWord) { + return GetSimilarity(firstWord, secondWord); + } + + /// + /// returns the long string identifier for the metric. + /// + public override string LongDescriptionString { + get { + return + "Implements the Overlap Coefficient algorithm providing a similarity measure between two string where it is determined to what degree a string is a subset of another"; + } + } + + /// + /// returns the string identifier for the metric. + /// + public override string ShortDescriptionString { get { return "OverlapCoefficient"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/MathFuncs.cs b/src/SimMetrics/UtilityClasses/MathFuncs.cs new file mode 100644 index 0000000..b180126 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/MathFuncs.cs @@ -0,0 +1,118 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + + /// + /// MathFuncs implements a number of handy maths functions. + /// + static public class MathFunctions { + /// + /// returns the max of three numbers. + /// + /// first number to test + /// second number to test + /// third number to test + /// the max of three numbers. + static public double MaxOf3(double firstNumber, double secondNumber, double thirdNumber) { + return Math.Max(firstNumber, Math.Max(secondNumber, thirdNumber)); + } + + /// + /// returns the max of three numbers. + /// + /// first number to test + /// second number to test + /// third number to test + /// the max of three numbers. + static public int MaxOf3(int firstNumber, int secondNumber, int thirdNumber) { + return Math.Max(firstNumber, Math.Max(secondNumber, thirdNumber)); + } + + /// + /// returns the max of four numbers. + /// + /// first number to test + /// second number to test + /// third number to test + /// fourth number to test + /// the max of four numbers. + static public double MaxOf4(double firstNumber, double secondNumber, double thirdNumber, double fourthNumber) { + return Math.Max(Math.Max(firstNumber, secondNumber), Math.Max(thirdNumber, fourthNumber)); + } + + /// + /// returns the min of three numbers. + /// + /// first number to test + /// second number to test + /// third number to test + /// the min of three numbers. + static public double MinOf3(double firstNumber, double secondNumber, double thirdNumber) { + return Math.Min(firstNumber, Math.Min(secondNumber, thirdNumber)); + } + + /// + /// returns the min of three numbers. + /// + /// first number to test + /// second number to test + /// third number to test + /// the min of three numbers. + static public int MinOf3(int firstNumber, int secondNumber, int thirdNumber) { + return Math.Min(firstNumber, Math.Min(secondNumber, thirdNumber)); + } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram2.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram2.cs new file mode 100644 index 0000000..038f573 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram2.cs @@ -0,0 +1,70 @@ +#region Copyright +/* This new class in the .NET version holds a Bigram implementation. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + + /// + /// implementaton of the Bigram tokeniser + /// + public class TokeniserQGram2 : AbstractTokeniserQGramN { + /// + /// constructor + /// + public TokeniserQGram2() { + StopWordHandler = new DummyStopTermHandler(); + TokenUtilities = new TokeniserUtilities(); + CharacterCombinationIndex = 0; + QGramLength = 2; + } + + /// + /// Return tokenized version of a string. + /// + /// input + /// tokenized version of a string + public override Collection Tokenize(string word) { + return Tokenize(word, false, QGramLength, CharacterCombinationIndex); + } + + /// + /// displays the tokenisation method. + /// + public override string ShortDescriptionString { get { return "TokeniserQGram2"; } } + + /// + /// override the ToString method to give accurate information on current settings + /// + /// details of current tokeniser + public override string ToString() { + if (String.IsNullOrEmpty(SuppliedWord)) { + return string.Format("{0} : not word passed for tokenising yet.", ShortDescriptionString); + } + else { + return + string.Format("{0} - currently holding : {1}.{2}The method is using a QGram length of {3}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, Convert.ToInt32(QGramLength)); + } + } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram2Extended.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram2Extended.cs new file mode 100644 index 0000000..aa02623 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram2Extended.cs @@ -0,0 +1,59 @@ +#region Copyright +/* This new class in the .NET version holds a Bigram implementation. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using System.Collections.ObjectModel; + + /// + /// implementation of a Bigram tokeniser using extended logic + /// + public class TokeniserQGram2Extended : TokeniserQGram2 { + /// + /// Return tokenized version of a string. + /// + /// input + /// tokenized version of a string + public override Collection Tokenize(string word) { + return Tokenize(word, true, QGramLength, CharacterCombinationIndex); + } + + /// + /// displays the tokenisation method. + /// + public override string ShortDescriptionString { get { return "TokeniserQGram2Extended"; } } + + /// + /// override the ToString method to give accurate information on current settings + /// + /// details of current tokeniser + public override string ToString() { + if (String.IsNullOrEmpty(SuppliedWord)) { + return string.Format("{0} : not word passed for tokenising yet.", ShortDescriptionString); + } + else { + return + string.Format("{0} - currently holding : {1}.{2}The method is using a QGram length of {3}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, Convert.ToInt32(QGramLength)); + } + } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram3.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram3.cs new file mode 100644 index 0000000..0eb9703 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram3.cs @@ -0,0 +1,70 @@ +#region Copyright +/* This new class in the .NET version holds a Bigram implementation. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + + /// + /// implementaton of the Bigram tokeniser + /// + public class TokeniserQGram3 : AbstractTokeniserQGramN { + /// + /// constructor + /// + public TokeniserQGram3() { + StopWordHandler = new DummyStopTermHandler(); + TokenUtilities = new TokeniserUtilities(); + CharacterCombinationIndex = 0; + QGramLength = 3; + } + + /// + /// Return tokenized version of a string. + /// + /// input + /// tokenized version of a string + public override Collection Tokenize(string word) { + return Tokenize(word, false, QGramLength, CharacterCombinationIndex); + } + + /// + /// displays the tokenisation method. + /// + public override string ShortDescriptionString { get { return "TokeniserQGram3"; } } + + /// + /// override the ToString method to give accurate information on current settings + /// + /// details of current tokeniser + public override string ToString() { + if (String.IsNullOrEmpty(SuppliedWord)) { + return string.Format("{0} : not word passed for tokenising yet.", ShortDescriptionString); + } + else { + return + string.Format("{0} - currently holding : {1}.{2}The method is using a QGram length of {3}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, Convert.ToInt32(QGramLength)); + } + } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram3Extended.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram3Extended.cs new file mode 100644 index 0000000..2264f73 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserQGram3Extended.cs @@ -0,0 +1,59 @@ +#region Copyright +/* This new class in the .NET version holds a Bigram implementation. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using System.Collections.ObjectModel; + + /// + /// implementation of a Bigram tokeniser using extended logic + /// + public class TokeniserQGram3Extended : TokeniserQGram3 { + /// + /// Return tokenized version of a string. + /// + /// input + /// tokenized version of a string + public override Collection Tokenize(string word) { + return Tokenize(word, true, QGramLength, CharacterCombinationIndex); + } + + /// + /// displays the tokenisation method. + /// + public override string ShortDescriptionString { get { return "TokeniserQGram3Extended"; } } + + /// + /// override the ToString method to give accurate information on current settings + /// + /// details of current tokeniser + public override string ToString() { + if (String.IsNullOrEmpty(SuppliedWord)) { + return string.Format("{0} : not word passed for tokenising yet.", ShortDescriptionString); + } + else { + return + string.Format("{0} - currently holding : {1}.{2}The method is using a QGram length of {3}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, Convert.ToInt32(QGramLength)); + } + } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram2.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram2.cs new file mode 100644 index 0000000..9c4a928 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram2.cs @@ -0,0 +1,65 @@ +#region Copyright +/* This new class in the .NET version holds a Bigram implementation. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + + /// + /// implementaton of the Sgram tokeniser + /// + public class TokeniserSGram2 : TokeniserQGram2 { + /// + /// constructor + /// + public TokeniserSGram2() : base() { + CharacterCombinationIndex = 1; + } + + /// + /// displays the tokenisation method. + /// + public override string ShortDescriptionString { get { return "TokeniserSGram2"; } } + + /// + /// override the ToString method to give accurate information on current settings + /// + /// details of current tokeniser + public override string ToString() { + if (String.IsNullOrEmpty(SuppliedWord)) { + return string.Format("{0} : not word passed for tokenising yet.", ShortDescriptionString); + } + else { + if (CharacterCombinationIndex == 0) { + return + string.Format("{0} - currently holding : {1}.{2}The method is using a QGram length of {3}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, Convert.ToInt32(QGramLength)); + } + else { + return + string.Format( + "{0} - currently holding : {1}.{2}The method is using a character combination index of {3} and {4}a QGram length of {5}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, + Convert.ToInt32(CharacterCombinationIndex), Environment.NewLine, Convert.ToInt32(QGramLength)); + } + } + } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram2Extended.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram2Extended.cs new file mode 100644 index 0000000..1f4624f --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram2Extended.cs @@ -0,0 +1,65 @@ +#region Copyright +/* This new class in the .NET version holds a Bigram implementation. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + + /// + /// implementation of a SGram tokeniser using extended logic + /// + public class TokeniserSGram2Extended : TokeniserQGram2Extended { + /// + /// constructor + /// + public TokeniserSGram2Extended() : base() { + CharacterCombinationIndex = 1; + } + + /// + /// displays the tokenisation method. + /// + public override string ShortDescriptionString { get { return "TokeniserSGram2Extended"; } } + + /// + /// override the ToString method to give accurate information on current settings + /// + /// details of current tokeniser + public override string ToString() { + if (String.IsNullOrEmpty(SuppliedWord)) { + return string.Format("{0} : no word passed for tokenising yet.", ShortDescriptionString); + } + else { + if (CharacterCombinationIndex == 0) { + return + string.Format("{0} - currently holding : {1}.{2}The method is using a QGram length of {3}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, Convert.ToInt32(QGramLength)); + } + else { + return + string.Format( + "{0} - currently holding : {1}.{2}The method is using a character combination index of {3} and {4}a QGram length of {5}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, + Convert.ToInt32(CharacterCombinationIndex), Environment.NewLine, Convert.ToInt32(QGramLength)); + } + } + } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram3.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram3.cs new file mode 100644 index 0000000..c00764e --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram3.cs @@ -0,0 +1,65 @@ +#region Copyright +/* This new class in the .NET version holds a Bigram implementation. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + + /// + /// implementaton of the Sgram tokeniser + /// + public class TokeniserSGram3 : TokeniserQGram3 { + /// + /// constructor + /// + public TokeniserSGram3() : base() { + CharacterCombinationIndex = 1; + } + + /// + /// displays the tokenisation method. + /// + public override string ShortDescriptionString { get { return "TokeniserSGram3"; } } + + /// + /// override the ToString method to give accurate information on current settings + /// + /// details of current tokeniser + public override string ToString() { + if (String.IsNullOrEmpty(SuppliedWord)) { + return string.Format("{0} : not word passed for tokenising yet.", ShortDescriptionString); + } + else { + if (CharacterCombinationIndex == 0) { + return + string.Format("{0} - currently holding : {1}.{2}The method is using a QGram length of {3}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, Convert.ToInt32(QGramLength)); + } + else { + return + string.Format( + "{0} - currently holding : {1}.{2}The method is using a character combination index of {3} and {4}a QGram length of {5}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, + Convert.ToInt32(CharacterCombinationIndex), Environment.NewLine, Convert.ToInt32(QGramLength)); + } + } + } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram3Extended.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram3Extended.cs new file mode 100644 index 0000000..aa443b4 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserSGram3Extended.cs @@ -0,0 +1,65 @@ +#region Copyright +/* This new class in the .NET version holds a Bigram implementation. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + + /// + /// implementation of a SGram tokeniser using extended logic + /// + public class TokeniserSGram3Extended : TokeniserQGram3Extended { + /// + /// constructor + /// + public TokeniserSGram3Extended() : base() { + CharacterCombinationIndex = 1; + } + + /// + /// displays the tokenisation method. + /// + public override string ShortDescriptionString { get { return "TokeniserSGram3Extended"; } } + + /// + /// override the ToString method to give accurate information on current settings + /// + /// details of current tokeniser + public override string ToString() { + if (String.IsNullOrEmpty(SuppliedWord)) { + return string.Format("{0} : no word passed for tokenising yet.", ShortDescriptionString); + } + else { + if (CharacterCombinationIndex == 0) { + return + string.Format("{0} - currently holding : {1}.{2}The method is using a QGram length of {3}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, Convert.ToInt32(QGramLength)); + } + else { + return + string.Format( + "{0} - currently holding : {1}.{2}The method is using a character combination index of {3} and {4}a QGram length of {5}.", + ShortDescriptionString, SuppliedWord, Environment.NewLine, + Convert.ToInt32(CharacterCombinationIndex), Environment.NewLine, Convert.ToInt32(QGramLength)); + } + } + } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserUtilities.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserUtilities.cs new file mode 100644 index 0000000..4502a22 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserUtilities.cs @@ -0,0 +1,180 @@ +#region Copyright +/* This new class in the .NET version holds utility functions for use with the + * various Collection token collections. + * + * (c) Chris Parkinson 2006. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using System.Collections.ObjectModel; + + /// + /// class containing utility functions for the tokenisers to use. + /// these are in two main version + /// collections or sets + /// a collection can contain the same value multiple times ad set can only have the value once. + /// + /// type for token collection + [Serializable] + public class TokeniserUtilities { + /// + /// constructor + /// + public TokeniserUtilities() { + allTokens = new Collection(); + tokenSet = new Collection(); + } + + Collection allTokens; + int firstSetTokenCount; + int firstTokenCount; + int secondSetTokenCount; + int secondTokenCount; + Collection tokenSet; + + /// + /// returns the number of common tokens from the two supplied token sets + /// + /// + public int CommonSetTerms() { + return FirstSetTokenCount + SecondSetTokenCount - tokenSet.Count; + } + + /// + /// returns number of common tokens from the two supplied token collections + /// + /// + public int CommonTerms() { + return FirstTokenCount + SecondTokenCount - allTokens.Count; + } + + /// + /// method to merge two token lists to keep all tokens + /// + /// first token list + /// second token list + /// list of all tokens + public Collection CreateMergedList(Collection firstTokens, Collection secondTokens) { + allTokens.Clear(); + firstTokenCount = firstTokens.Count; + secondTokenCount = secondTokens.Count; + MergeLists(firstTokens); + MergeLists(secondTokens); + return allTokens; + } + + /// + /// method to merge two token lists to keep only unique tokens + /// + /// first token list + /// second token list + /// list of unique tokens + public Collection CreateMergedSet(Collection firstTokens, Collection secondTokens) { + tokenSet.Clear(); + firstSetTokenCount = CalculateUniqueTokensCount(firstTokens); + secondSetTokenCount = CalculateUniqueTokensCount(secondTokens); + MergeIntoSet(firstTokens); + MergeIntoSet(secondTokens); + return tokenSet; + } + + /// + /// method to create a single token list of unique tokens + /// + /// token list to use + /// unique token list - sorted + public Collection CreateSet(Collection tokenList) { + tokenSet.Clear(); + AddUniqueTokens(tokenList); + firstTokenCount = tokenSet.Count; + secondTokenCount = 0; + return tokenSet; + } + + /// + /// method for merging extra token lists into the set + /// + /// token list to merge + public void MergeIntoSet(Collection firstTokens) { + AddUniqueTokens(firstTokens); + } + + /// + /// method for merging into the list + /// + /// token list to merge + public void MergeLists(Collection firstTokens) { + AddTokens(firstTokens); + } + + void AddTokens(Collection tokenList) { + foreach (T token in tokenList) { + allTokens.Add(token); + } + } + + void AddUniqueTokens(Collection tokenList) { + foreach (T token in tokenList) { + if (!tokenSet.Contains(token)) { + tokenSet.Add(token); + } + } + } + + int CalculateUniqueTokensCount(Collection tokenList) { + Collection myList = new Collection(); + foreach (T token in tokenList) { + if (!myList.Contains(token)) { + myList.Add(token); + } + } + return myList.Count; + } + + /// + /// token count from first token list + /// + public int FirstSetTokenCount { get { return firstSetTokenCount; } } + + /// + /// token count from first token list + /// + public int FirstTokenCount { get { return firstTokenCount; } } + + /// + /// merged token List. unique tokens only + /// + public Collection MergedTokens { get { return allTokens; } } + + /// + /// token count from second token list + /// + public int SecondSetTokenCount { get { return secondSetTokenCount; } } + + /// + /// token count from second token list + /// + public int SecondTokenCount { get { return secondTokenCount; } } + + /// + /// merged token List. unique tokens only + /// + public Collection TokenSet { get { return tokenSet; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserWhitespace.cs b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserWhitespace.cs new file mode 100644 index 0000000..bad5497 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Tokenisers/TokeniserWhitespace.cs @@ -0,0 +1,144 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + + /// + /// implements a simple whitespace tokeniser. + /// + [Serializable] + sealed public class TokeniserWhitespace : ITokeniser { + /// + /// default constructor + /// + public TokeniserWhitespace() { + stopWordHandler = new DummyStopTermHandler(); + tokenUtilities = new TokeniserUtilities(); + } + + /// + /// private delimiters for white space within a string. + /// + string delimiters = "\r\n\t \x00A0"; + /// + /// stopWordHandler used by the tokenisation. + /// + ITermHandler stopWordHandler; + /// + /// private utilities for token manipulation + /// + TokeniserUtilities tokenUtilities; + + /// + /// Return tokenized version of a string. + /// + /// word to tokenize + /// tokenized version of a string + public Collection Tokenize(string word) { + Collection returnVect = new Collection(); + if (word != null) { + int nextGapPos; + for (int curPos = 0; curPos < word.Length; curPos = nextGapPos) { + char ch = word[curPos]; + if (Char.IsWhiteSpace(ch)) { + curPos++; + } + nextGapPos = word.Length; + for (int i = 0; i < delimiters.Length; i++) { + int testPos = word.IndexOf(delimiters[i], curPos); + if (testPos < nextGapPos && testPos != -1) { + nextGapPos = testPos; + } + } + + string term = word.Substring(curPos, (nextGapPos) - (curPos)); + if (!stopWordHandler.IsWord(term)) { + returnVect.Add(term); + } + } + } + return returnVect; + } + + /// + /// Return tokenized set of a string. + /// + /// input + /// tokenized set of a string + public Collection TokenizeToSet(string word) { + if (word != null) { + return tokenUtilities.CreateSet(Tokenize(word)); + } + return null; + } + + /// + /// displays the delimiters used. + /// + public string Delimiters { get { return delimiters; } } + + /// + /// displays the tokenisation method. + /// + public string ShortDescriptionString { get { return "TokeniserWhitespace"; } } + + /// + /// gets the stop word handler used. + /// + public ITermHandler StopWordHandler { get { return stopWordHandler; } set { stopWordHandler = value; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/Wordhandlers/DummyStopTermHandler.cs b/src/SimMetrics/UtilityClasses/Wordhandlers/DummyStopTermHandler.cs new file mode 100644 index 0000000..aacaac4 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/Wordhandlers/DummyStopTermHandler.cs @@ -0,0 +1,99 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System.Text; + using SimMetricsApi; + + /// + /// DummyStopTermHandler implements a dummy stop word handling function whereby no stopwords are considered. + /// + sealed public class DummyStopTermHandler : ITermHandler { + /// + /// adds a word to the intewrface. + /// + /// termToAdd the word to add + public void AddWord(string termToAdd) {} + + /// + /// isStopWord determines if a given term is a stop word or not. + /// + /// termToTest the term to test + /// always returns false. + public bool IsWord(string termToTest) { + return false; + } + + /// + /// removes the given stopword from the list. + /// + /// termToRemove the stopword term to remove + public void RemoveWord(string termToRemove) {} + + /// + /// gets the number of stopwords in the list. + /// + public int NumberOfWords { get { return 0; } } + + /// + /// displays the stopWordHandler method. + /// + public string ShortDescriptionString { get { return "DummyStopTermHandler"; } } + + /// + /// gets the stopwords as a stringBuffer. + /// + public StringBuilder WordsAsBuffer { get { return new StringBuilder(); } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/costfunctions/AffineGapRange1To0Multiplier1Over3.cs b/src/SimMetrics/UtilityClasses/costfunctions/AffineGapRange1To0Multiplier1Over3.cs new file mode 100644 index 0000000..ba7e2e8 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/costfunctions/AffineGapRange1To0Multiplier1Over3.cs @@ -0,0 +1,98 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using SimMetricsApi; + + /// + /// implements a Affine Gap cost function. + /// + [Serializable] + sealed public class AffineGapRange1To0Multiplier1Over3 : AbstractAffineGapCost { + const int charExactMatchScore = 1; + const int charMismatchMatchScore = 0; + + /// + /// get cost between characters. + /// + /// the string to get the cost of a gap + /// the index within the string to test a start gap from + /// the index within the string to test a end gap to + /// the cost of a Gap G + public override double GetCost(string textToGap, int stringIndexStartGap, int stringIndexEndGap) { + if (stringIndexStartGap >= stringIndexEndGap) { + return charMismatchMatchScore; + } + else { + return charExactMatchScore + (stringIndexEndGap - 1 - stringIndexStartGap) * 0.3333333F; + } + } + + /// + /// returns the maximum possible cost. + /// + public override double MaxCost { get { return 1.0; } } + + /// + /// returns the minimum possible cost. + /// + public override double MinCost { get { return 0.0; } } + + /// + /// returns the name of the cost function. + /// + public override string ShortDescriptionString { get { return "AffineGapRange1To0Multiplier1Over3"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/costfunctions/AffineGapRange5To0Multiplier1.cs b/src/SimMetrics/UtilityClasses/costfunctions/AffineGapRange5To0Multiplier1.cs new file mode 100644 index 0000000..ccc1cae --- /dev/null +++ b/src/SimMetrics/UtilityClasses/costfunctions/AffineGapRange5To0Multiplier1.cs @@ -0,0 +1,102 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using SimMetricsApi; + + /// + /// implements a affine gap cost function. + /// + [Serializable] + sealed public class AffineGapRange5To0Multiplier1 : AbstractAffineGapCost { + /// + /// + /// + const int charExactMatchScore = 5; + + const int charMismatchMatchScore = 0; + + /// + /// get cost between characters. + /// + /// the string to get the cost of a gap + /// the index within the string to test a start gap from + /// the index within the string to test a end gap to + /// the cost of a Gap G + public override double GetCost(string textToGap, int stringIndexStartGap, int stringIndexEndGap) { + if (stringIndexStartGap >= stringIndexEndGap) { + return charMismatchMatchScore; + } + else { + return charExactMatchScore + (stringIndexEndGap - 1 - stringIndexStartGap); + } + } + + /// + /// returns the maximum possible cost. + /// + public override double MaxCost { get { return charExactMatchScore; } } + + /// + /// returns the minimum possible cost. + /// + public override double MinCost { get { return charMismatchMatchScore; } } + + /// + /// returns the name of the cost function. + /// + public override string ShortDescriptionString { get { return "AffineGapRange5To0Multiplier1"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/costfunctions/SubCostRange0To1.cs b/src/SimMetrics/UtilityClasses/costfunctions/SubCostRange0To1.cs new file mode 100644 index 0000000..1b6e657 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/costfunctions/SubCostRange0To1.cs @@ -0,0 +1,97 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using SimMetricsApi; + + /// + /// implements a substitution cost function where d(i,j) = 1 if idoes not equal j, 0 if i equals j. + /// + [Serializable] + sealed public class SubCostRange0To1 : AbstractSubstitutionCost { + const int charExactMatchScore = 1; + const int charMismatchMatchScore = 0; + + /// + /// get cost between characters where d(i,j) = 1 if i does not equals j, 0 if i equals j. + /// + /// the string1 to evaluate the cost + /// the index within the string1 to test + /// the string2 to evaluate the cost + /// the index within the string2 to test + /// the cost of a given subsitution d(i,j) where d(i,j) = 1 if i!=j, 0 if i==j + public override double GetCost(string firstWord, int firstWordIndex, string secondWord, int secondWordIndex) { + if ((firstWord != null) && (secondWord != null)) { + return firstWord[firstWordIndex] != secondWord[secondWordIndex] ? charExactMatchScore : charMismatchMatchScore; + } + return 0.0; + } + + /// + /// returns the maximum possible cost. + /// + public override double MaxCost { get { return charExactMatchScore; } } + + /// + /// returns the minimum possible cost. + /// + public override double MinCost { get { return charMismatchMatchScore; } } + + /// + /// returns the name of the cost function. + /// + public override string ShortDescriptionString { get { return "SubCostRange0To1"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/costfunctions/SubCostRange1ToMinus2.cs b/src/SimMetrics/UtilityClasses/costfunctions/SubCostRange1ToMinus2.cs new file mode 100644 index 0000000..26bcd9b --- /dev/null +++ b/src/SimMetrics/UtilityClasses/costfunctions/SubCostRange1ToMinus2.cs @@ -0,0 +1,107 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between string Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using SimMetricsApi; + + /// + /// implements a substitution cost function where d(i,j) = 1 if i does not equal j, -2 if i equals j. + /// + [Serializable] + sealed public class SubCostRange1ToMinus2 : AbstractSubstitutionCost { + /// + /// + /// + const int charExactMatchScore = 1; + + const int charMismatchMatchScore = -2; + + /// + /// get cost between characters where d(i,j) = 1 if i does not equal j, -2 if i equals j. + /// + /// the string1 to evaluate the cost + /// the index within the string1 to test + /// the string2 to evaluate the cost + /// the index within the string2 to test + /// the cost of a given subsitution d(i,j) where d(i,j) = 1 if i!=j, -2 if i==j + public override double GetCost(string firstWord, int firstWordIndex, string secondWord, int secondWordIndex) { + if ((firstWord != null) && (secondWord != null)) { + if (firstWord.Length <= firstWordIndex || firstWordIndex < 0) { + return charMismatchMatchScore; + } + if (secondWord.Length <= secondWordIndex || secondWordIndex < 0) { + return charMismatchMatchScore; + } + return firstWord[firstWordIndex] != secondWord[secondWordIndex] ? charMismatchMatchScore : charExactMatchScore; + } + return charMismatchMatchScore; + } + + /// + /// returns the maximum possible cost. + /// + public override double MaxCost { get { return charExactMatchScore; } } + + /// + /// returns the minimum possible cost. + /// + public override double MinCost { get { return charMismatchMatchScore; } } + + /// + /// returns the name of the cost function. + /// + public override string ShortDescriptionString { get { return "SubCostRange1ToMinus2"; } } + } +} \ No newline at end of file diff --git a/src/SimMetrics/UtilityClasses/costfunctions/SubCostRange5ToMinus3.cs b/src/SimMetrics/UtilityClasses/costfunctions/SubCostRange5ToMinus3.cs new file mode 100644 index 0000000..7f7da12 --- /dev/null +++ b/src/SimMetrics/UtilityClasses/costfunctions/SubCostRange5ToMinus3.cs @@ -0,0 +1,162 @@ +#region Copyright +/* + * The original .NET implementation of the SimMetrics library is taken from the Java + * source and converted to NET using the Microsoft Java converter. + * It is not clear who made the initial convertion to .NET. + * + * This updated version has started with the 1.0 .NET release of SimMetrics and used + * FxCop (http://www.gotdotnet.com/team/fxcop/) to highlight areas where changes needed + * to be made to the converted code. + * + * this version with updates Copyright (c) 2006 Chris Parkinson. + * + * For any queries on the .NET version please contact me through the + * sourceforge web address. + * + * SimMetrics - SimMetrics is a java library of Similarity or Distance + * Metrics, e.g. Levenshtein Distance, that provide float based similarity + * measures between String Data. All metrics return consistant measures + * rather than unbounded similarity scores. + * + * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1 + * + * Please Feel free to contact me about this library, I would appreciate + * knowing quickly what you wish to use it for and any criticisms/comments + * upon the SimMetric library. + * + * email: s.chapman@dcs.shef.ac.uk + * www: http://www.dcs.shef.ac.uk/~sam/ + * www: http://www.dcs.shef.ac.uk/~sam/stringmetrics.html + * + * address: Sam Chapman, + * Department of Computer Science, + * University of Sheffield, + * Sheffield, + * S. Yorks, + * S1 4DP + * United Kingdom, + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#endregion + +namespace SimMetricsUtilities { + using System; + using System.Collections.ObjectModel; + using SimMetricsApi; + + /// + /// SubCostRange5ToMinus3 implements a cost function as used in Monge Elkan where by an exact match + /// no match or an approximate match whereby a set of characters are in an approximate range. + /// for pairings in {dt} {gj} {lr} {mn} {bpv} {aeiou} {,.} + /// + [Serializable] + sealed public class SubCostRange5ToMinus3 : AbstractSubstitutionCost { + const int charApproximateMatchScore = 3; + const int charExactMatchScore = 5; + const int charMismatchMatchScore = -3; + + /// + /// constructor + /// Sets up the matching sets + /// approximate match = +3, + /// for pairings in {dt} {gj} {lr} {mn} {bpv} {aeiou} {,.}. + /// + public SubCostRange5ToMinus3() { + { + approx = new Collection[7]; + approx[0] = new Collection(); + approx[0].Add("d"); + approx[0].Add("t"); + approx[1] = new Collection(); + approx[1].Add("g"); + approx[1].Add("j"); + approx[2] = new Collection(); + approx[2].Add("l"); + approx[2].Add("r"); + approx[3] = new Collection(); + approx[3].Add("m"); + approx[3].Add("n"); + approx[4] = new Collection(); + approx[4].Add("b"); + approx[4].Add("p"); + approx[4].Add("v"); + approx[5] = new Collection(); + approx[5].Add("a"); + approx[5].Add("e"); + approx[5].Add("i"); + approx[5].Add("o"); + approx[5].Add("u"); + approx[6] = new Collection(); + approx[6].Add(","); + approx[6].Add("."); + } + } + + /// + /// approximate character set. + /// + Collection[] approx; + + /// + /// get cost between characters where + /// d(i,j) = charExactMatchScore if i equals j, + /// charApproximateMatchScore if i approximately equals j or + /// charMismatchMatchScore if i does not equal j. + /// + /// the string1 to evaluate the cost + /// the index within the string1 to test + /// the string2 to evaluate the cost + /// the index within the string2 to test + /// the cost of a given subsitution d(i,j) as defined above + public override double GetCost(String firstWord, int firstWordIndex, String secondWord, int secondWordIndex) { + if ((firstWord != null) && (secondWord != null)) { + if (firstWord.Length <= firstWordIndex || firstWordIndex < 0) { + return charMismatchMatchScore; + } + if (secondWord.Length <= secondWordIndex || secondWordIndex < 0) { + return charMismatchMatchScore; + } + if (firstWord[firstWordIndex] == secondWord[secondWordIndex]) { + return charExactMatchScore; + } + + string si = firstWord[firstWordIndex].ToString().ToLowerInvariant(); + string ti = secondWord[secondWordIndex].ToString().ToLowerInvariant(); + for (int i = 0; i < approx.Length; i++) { + if (approx[i].Contains(si) && approx[i].Contains(ti)) { + return charApproximateMatchScore; + } + } + } + return charMismatchMatchScore; + } + + /// + /// returns the maximum possible cost. + /// + public override double MaxCost { get { return charExactMatchScore; } } + + /// + /// returns the minimum possible cost. + /// + public override double MinCost { get { return charMismatchMatchScore; } } + + /// + /// returns the name of the cost function. + /// + public override String ShortDescriptionString { get { return "SubCostRange5ToMinus3"; } } + } +} \ No newline at end of file diff --git a/src/Source/ControlClickManager.cs b/src/Source/ControlClickManager.cs new file mode 100644 index 0000000..30cf2f2 --- /dev/null +++ b/src/Source/ControlClickManager.cs @@ -0,0 +1,221 @@ +using System; +using System.Drawing; +using System.Runtime.InteropServices; +using System.Windows.Forms; +using ASCompletion.Completion; +using ASCompletion.Context; +using FlashDevelop; +using PluginCore.Controls; +using PluginCore.Managers; +using PluginCore.Utilities; +using ScintillaNet; + +namespace FindFilesPlugin +{ + class ControlClickManager + { + private const int CLICK_AREA = 4; //pixels + + private ScintillaControl sciControl; + private Word currentWord; + private Timer timer; + private POINT clickedPoint = new POINT(); + + #region MouseHook definitions + + public delegate int HookProc(int nCode, IntPtr wParam, IntPtr lParam); + private int hHook = 0; + private const int WH_MOUSE = 7; +// ReSharper disable InconsistentNaming + private HookProc SafeHookProc; + + [StructLayout(LayoutKind.Sequential)] + public class POINT + { + public int x; + public int y; + } + + [StructLayout(LayoutKind.Sequential)] + public class MouseHookStruct + { + public POINT pt; + public int hwnd; + public int wHitTestCode; + public int dwExtraInfo; + } +// ReSharper restore InconsistentNaming + + [DllImport("user32.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)] + public static extern int SetWindowsHookEx(int idHook, HookProc lpfn, IntPtr hInstance, int threadId); + + [DllImport("user32.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)] + public static extern bool UnhookWindowsHookEx(int idHook); + + [DllImport("user32.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)] + public static extern int CallNextHookEx(int idHook, int nCode, IntPtr wParam, IntPtr lParam); + + #endregion + + public ControlClickManager() + { + timer = new Timer(); + timer.Interval = 10; + timer.Tick += GoToDeclaration; + } + + private void GoToDeclaration(object sender, EventArgs e) + { + timer.Stop(); + SetCurrentWord(null); + ASComplete.DeclarationLookup(sciControl); + } + + public ScintillaControl SciControl + { + get { return sciControl; } + set + { + if (hHook == 0) + { + SafeHookProc = new HookProc(MouseHookProc); + #pragma warning disable 618,612 + hHook = SetWindowsHookEx(WH_MOUSE, SafeHookProc, (IntPtr)0, AppDomain.GetCurrentThreadId()); + #pragma warning restore 618,612 + } + sciControl = value; + } + } + + public int MouseHookProc(int nCode, IntPtr wParam, IntPtr lParam) + { + if (nCode >= 0 && sciControl != null) + { + //TraceManager.Add(wParam.ToString()); + MouseHookStruct hookStruct = (MouseHookStruct)Marshal.PtrToStructure(lParam, typeof(MouseHookStruct)); + if (wParam == (IntPtr) 513) //mouseDown + { + clickedPoint.x = hookStruct.pt.x; + clickedPoint.y = hookStruct.pt.y; + } + + if (Control.ModifierKeys == Keys.Control) + { + if (wParam == (IntPtr) 514) //mouseUp + { + if (currentWord != null) + { + if (!timer.Enabled) + timer.Start(); + } + } + else + { + if (((Control.MouseButtons & MouseButtons.Left) > 0)) + { + int dx = Math.Abs(clickedPoint.x - hookStruct.pt.x); + int dy = Math.Abs(clickedPoint.y - hookStruct.pt.y); + if (currentWord != null && dx > CLICK_AREA || dy > CLICK_AREA) + SetCurrentWord(null); + } + else + { + Point globalPoint = new Point(hookStruct.pt.x, hookStruct.pt.y); + Point localPoint = sciControl.PointToClient(globalPoint); + ProcessMouseMove(localPoint); + } + } + } + else + { + if (currentWord != null) + SetCurrentWord(null); + } + } + return CallNextHookEx(hHook, nCode, wParam, lParam); + } + + private void ProcessMouseMove(Point point) + { + int position = sciControl.PositionFromPointClose(point.X, point.Y); + if (position < 0) + { + SetCurrentWord(null); + return; + } + + if (ASContext.Context.IsFileValid) + { + Word word = new Word(); + word.StartPos = sciControl.WordStartPosition(position, true); + word.EndPos = sciControl.WordEndPosition(position, true); + + ASResult result = ASComplete.GetExpressionType(sciControl, word.EndPos); + if (!result.IsNull()) + { + SetCurrentWord(word); + //string text = ASComplete.GetToolTipText(result); + //if (text != null) + // UITools.Tip.ShowAtMouseLocation(text); + } + else + { + SetCurrentWord(null); + } + } + } + + private void SetCurrentWord(Word word) + { + if (Word.Equals(word, currentWord)) + return; + + if (currentWord != null) + UnHighlight(currentWord); + + currentWord = word; + + if (currentWord != null) + Highlight(currentWord); + + } + + private void UnHighlight(Word word) + { + sciControl.CursorType = -1; + Int32 mask = 1 << sciControl.StyleBits; + sciControl.StartStyling(word.StartPos, mask); + sciControl.SetStyling(word.EndPos - word.StartPos, 0); + } + + private void Highlight(Word word) + { + sciControl.CursorType = 8; + Int32 mask = 1 << sciControl.StyleBits; + ScintillaNet.Configuration.Language language + = Globals.MainForm.SciConfig.GetLanguage(sciControl.ConfigurationLanguage); + sciControl.SetIndicStyle(0, (Int32)ScintillaNet.Enums.IndicatorStyle.RoundBox); + sciControl.SetIndicFore(0, language.editorstyle.HighlightBackColor); + sciControl.StartStyling(word.StartPos, mask); + sciControl.SetStyling(word.EndPos - word.StartPos, mask); + } + } + + class Word + { + public static bool Equals(Word word1, Word word2) + { + if (word1 == null && word2 == null) + return true; + if (word1 == null || word2 == null) + return false; + return word1.StartPos == word2.StartPos + && word1.EndPos == word2.EndPos; + } + + public int StartPos; + public int EndPos; + } +} + + diff --git a/src/Source/HiPerfTimer.cs b/src/Source/HiPerfTimer.cs new file mode 100644 index 0000000..1496356 --- /dev/null +++ b/src/Source/HiPerfTimer.cs @@ -0,0 +1,64 @@ +using System; +using System.Runtime.InteropServices; +using System.ComponentModel; +using System.Threading; + +namespace Win32 +{ + internal class HiPerfTimer + { + [DllImport("Kernel32.dll")] + private static extern bool QueryPerformanceCounter( + out long lpPerformanceCount); + + [DllImport("Kernel32.dll")] + private static extern bool QueryPerformanceFrequency( + out long lpFrequency); + + private long startTime, stopTime; + private long freq; + + // Constructor + + public HiPerfTimer() + { + startTime = 0; + stopTime = 0; + + if (QueryPerformanceFrequency(out freq) == false) + { + // high-performance counter not supported + + throw new Win32Exception(); + } + } + + // Start the timer + + public void Start() + { + // lets do the waiting threads there work + + Thread.Sleep(0); + + QueryPerformanceCounter(out startTime); + } + + // Stop the timer + + public void Stop() + { + QueryPerformanceCounter(out stopTime); + } + + // Returns the duration of the timer (in seconds) + + public double Duration + { + get + { + return (double)(stopTime - startTime) / (double)freq; + } + } + } +} \ No newline at end of file diff --git a/src/Source/PluginMain.cs b/src/Source/PluginMain.cs new file mode 100644 index 0000000..28135a4 --- /dev/null +++ b/src/Source/PluginMain.cs @@ -0,0 +1,322 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Windows.Forms; +using System.ComponentModel; +using PluginCore.Utilities; +using PluginCore.Managers; +using PluginCore.Helpers; +using PluginCore; + +namespace FindFilesPlugin +{ + public class PluginMain : IPlugin + { + private const String PLUGIN_NAME = "FindFiles"; + private const String PLUGIN_GUID = "ac04a177-f578-47d7-87f1-0cbc0f834446"; + private const String PLUGIN_HELP = "www.flashdevelop.org/community/"; + private const String PLUGIN_AUTH = "Canab & Sam Batista"; + private const String SETTINGS_FILE = "Settings.fdb"; + private const String PLUGIN_DESC = "FindFiles Plugin"; + + private String settingFilename; + public Settings settingObject; + + public static List cachedFiles = new List(); + + private ControlClickManager controlClickManager; + + private ToolStripMenuItem findFilesMenuItem = null; + private ToolStripMenuItem quickOutlineMenuItem = null; + + #region Required Properties + + /// + /// For FD4 Compatibility + /// + public Int32 Api + { + get { return 1; } + } + /// + /// Name of the plugin + /// + public String Name + { + get { return PLUGIN_NAME; } + } + + /// + /// GUID of the plugin + /// + public String Guid + { + get { return PLUGIN_GUID; } + } + + /// + /// Author of the plugin + /// + public String Author + { + get { return PLUGIN_AUTH; } + } + + /// + /// Description of the plugin + /// + public String Description + { + get { return PLUGIN_DESC; } + } + + /// + /// Web address for help + /// + public String Help + { + get { return PLUGIN_HELP; } + } + + /// + /// Object that contains the settings + /// + [Browsable(false)] + public Object Settings + { + get { return settingObject; } + } + + #endregion + + #region Required Methods + + /// + /// Initializes the plugin + /// + public void Initialize() + { + InitBasics(); + LoadSettings(); + AddEventHandlers(); + CreateMenuItems(); + + if (settingObject.CtrlClickEnabled) + controlClickManager = new ControlClickManager(); + } + + /// + /// Disposes the plugin + /// + public void Dispose() + { + SaveSettings(); + } + + public static void ClearCachedFiles() + { + PluginMain.cachedFiles = new List(); + FindFilesForm.cashedFiles = new List(); + } + + /// + /// Handles the incoming events + /// + public void HandleEvent(Object sender, NotifyEvent e, HandlingPriority prority) + { + if (e.Type == EventType.FileSwitch) + { + if (controlClickManager != null && PluginBase.MainForm.CurrentDocument != null) + controlClickManager.SciControl = PluginBase.MainForm.CurrentDocument.SciControl; + } + } + + #endregion + + #region Custom Methods + + /// + /// Initializes important variables + /// + public void InitBasics() + { + String dataPath = Path.Combine(PathHelper.DataDir, PLUGIN_NAME); + if (!Directory.Exists(dataPath)) + Directory.CreateDirectory(dataPath); + settingFilename = Path.Combine(dataPath, SETTINGS_FILE); + } + + public void AddEventHandlers() + { + EventManager.AddEventHandler(this, EventType.FileSwitch); + } + + public void CreateMenuItems() + { + ToolStripMenuItem menu = (ToolStripMenuItem)PluginBase.MainForm.FindMenuItem("SearchMenu"); + + findFilesMenuItem = new ToolStripMenuItem("Find Files", + PluginBase.MainForm.FindImage("209"), + new EventHandler(ShowResourceForm), + settingObject.OpenResourceShortcut); + + quickOutlineMenuItem = new ToolStripMenuItem("Quick Outline", + PluginBase.MainForm.FindImage("315|16|0|0"), + new EventHandler(ShowOutlineForm), + settingObject.QuickOutlineShortcut); + + PluginBase.MainForm.RegisterShortcutItem("SearchMenu.FindFiles", findFilesMenuItem); + PluginBase.MainForm.RegisterShortcutItem("SearchMenu.QuickOutline", quickOutlineMenuItem); + + menu.DropDownItems.Insert(3, quickOutlineMenuItem); + menu.DropDownItems.Insert(7, findFilesMenuItem); + } + + public void ShowResourceForm(object sender, EventArgs e) + { + new FindFilesForm(this).ShowDialog(); + } + + private void ShowOutlineForm(object sender, EventArgs e) + { + new QuickOutlineForm(this).ShowDialog(); + } + + public void LoadSettings() + { + if (File.Exists(settingFilename)) + { + try + { + settingObject = new Settings(); + settingObject = (Settings) ObjectSerializer.Deserialize(settingFilename, settingObject); + } + catch + { + settingObject = new Settings(); + SaveSettings(); + } + } + else + { + settingObject = new Settings(); + SaveSettings(); + } + } + + public void SaveSettings() + { + ObjectSerializer.Serialize(settingFilename, settingObject); + } + + public List GetProjectDirectory() + { + List folders = new List(); + + // Check if we have a project open. + if ((settingObject.SearchProject || settingObject.SearchInProjectOnly) && PluginBase.CurrentProject != null) + { + String projectFolder = Path.GetDirectoryName(PluginBase.CurrentProject.ProjectPath); + folders.Add(projectFolder); + } + + return folders; + } + + public List GetSearchDirectories() + { + List folders = new List(); + // Lots of things can go wrong here. Missing Permissions for example. + try + { + // Check if we have a project open. + if ((settingObject.SearchProject || settingObject.SearchInProjectOnly) && PluginBase.CurrentProject != null) + { + bool bIncludeProjectFolders = true; + String projectFolder = Path.GetDirectoryName(PluginBase.CurrentProject.ProjectPath); + + if (settingObject.SearchDirectory != settingObject.DefaultEmptyString) + { + if (settingObject.SearchDirectory.StartsWith(projectFolder)) + bIncludeProjectFolders = false; + } + + if (bIncludeProjectFolders) + { + folders.Add(projectFolder); + foreach (String path in PluginBase.CurrentProject.SourcePaths) + { + if (Path.IsPathRooted(path)) + { + folders.Add(path); + } + else + { + String folder = Path.GetFullPath(Path.Combine(projectFolder, path)); + if (!folder.StartsWith(projectFolder)) + folders.Add(folder); + } + } + } + } + + // A directory was specified, lets get the underlying folders there! + if (settingObject.SearchDirectory != settingObject.DefaultEmptyString) + { + string[] directories = settingObject.SearchDirectory.Split(new char[] { ',', ';' }); + if (directories.Length > 0 && directories[0] != settingObject.DefaultEmptyString) + { + foreach (string dir in directories) + { + if (Directory.Exists(dir)) + folders.Add(dir); + else + MessageBox.Show("Path: \"" + dir + "\" not found. Please check your Find Files settings.", "Directory Not Found", MessageBoxButtons.OK); + } + } + } + } + catch (System.Exception ex) + { + MessageBox.Show(ex.Message, "Exception!", MessageBoxButtons.OK); + settingObject.SearchDirectory = settingObject.DefaultEmptyString; + Dispose(); + } + + return folders; + } + + public List GetFiles(bool inProjectOnly = false) + { + if (cachedFiles.Count == 0) + { + List folders = inProjectOnly ? GetProjectDirectory() : GetSearchDirectories(); + foreach (String folder in folders) + { + try + { + string[] filters = settingObject.SearchFilter.Split(new char[] { ',', ';', '|' }); + foreach (string filter in filters) + { + cachedFiles.AddRange(Directory.GetFiles(folder, filter, SearchOption.AllDirectories)); + } + } + catch (ArgumentException) + { + MessageBox.Show("Search Filter: \"" + settingObject.SearchFilter + "\" is invalid. Visit http://msdn.microsoft.com/en-us/library/ms143316.aspx for more information.", "Invalid Search Filter", MessageBoxButtons.OK); + settingObject.SearchFilter = settingObject.DefaultSearchFilter; + return new List(); + } + catch (DirectoryNotFoundException) + { + // Do nothing, did not find a folder, just don't include it. + } + } + } + + return cachedFiles; + } + #endregion + + } +} diff --git a/src/Source/SearchManager.cs b/src/Source/SearchManager.cs new file mode 100644 index 0000000..639ecda --- /dev/null +++ b/src/Source/SearchManager.cs @@ -0,0 +1,195 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Windows.Forms; +using System.IO; +using SimMetricsMetricUtilities; +using PluginCore; +using System.Drawing; + +namespace FindFilesPlugin +{ + class SearchManager + { + // Information regarding the rest of the plugin. + private DataGridView dataGridPanel; + private PluginMain plugin; + private Settings pluginSettings; + // Keep track of added files to eliminate dupes in file list display. + private Dictionary eliminateDupes; + + // A list so that we can properly handle files with the same name. + public List fileList = new List(); + + #region SimMetrics Classes + + SmithWaterman smithWaterman = new SmithWaterman(); + SmithWatermanGotoh smithWatermanGotoh = new SmithWatermanGotoh(); + Jaro jaro = new Jaro(); + JaroWinkler jaroWinkler = new JaroWinkler(); + #endregion + + public SearchManager(PluginMain plugin, DataGridView panel) + { + this.plugin = plugin; + pluginSettings = plugin.Settings as Settings; + dataGridPanel = panel; + } + + public void AddFileToSearchList(string file) + { + int slashIndex = file.LastIndexOf('\\'); + String name = file.Substring(slashIndex + 1); + fileList.Add(new FileData(name, file)); + } + + private void AddFileToDataGrid(FileData file) + { + // Fast and simple hack to keep the display list clear of dupes + // (caused by searching for files in a project that is contained within one of the Search Directories). + if (!eliminateDupes.ContainsKey(file.Path)) + eliminateDupes.Add(file.Path, null); + else + return; + + string filePath = file.Path; + bool fileFoundInProject = false; + + // Fix Paths so they only show relevant information + if (PluginBase.CurrentProject != null) + { + String projectFolder = Path.GetDirectoryName(PluginBase.CurrentProject.ProjectPath); + if (filePath.Contains(projectFolder)) + { + filePath = filePath.Substring(projectFolder.Length); + fileFoundInProject = true; + } + } + if ((pluginSettings.SearchDirectory != pluginSettings.DefaultEmptyString) && !fileFoundInProject) + { + if (filePath.Contains(pluginSettings.SearchDirectory)) + filePath = filePath.Substring(pluginSettings.SearchDirectory.Length); + } + + dataGridPanel.Rows.Add(file.Name, filePath); + } + + public void DoSearch(string searchText) + { + List tmpList = new List(); + + foreach (FileData file in fileList) + { + string fileName = pluginSettings.IgnoreCase ? file.Name.ToLower() : file.Name; + file.Similarity = GetStringSimilarity(searchText, fileName); + + if (pluginSettings.NormalizeSearchResults) + { + if (file.Similarity >= pluginSettings.NormalizedSearchThreshold) + { + file.Similarity = AdjustSimilarityBasedOnSearchPattern(file); + tmpList.Add(file); + } + } + else + { + if (file.Similarity > 0) + { + file.Similarity = AdjustSimilarityBasedOnSearchPattern(file); + tmpList.Add(file); + } + } + } + + tmpList.Sort(delegate(FileData f1, FileData f2) { return f2.Similarity.CompareTo(f1.Similarity); }); + + // Add files to the Data Grid + eliminateDupes = new Dictionary(); + int j = 0, maxFontWidth = 0; + Size s = new Size(); + Font font = dataGridPanel.Columns[0].DefaultCellStyle.Font; + foreach (FileData file in tmpList) + { + s = TextRenderer.MeasureText(file.Name, font); + if (s.Width > maxFontWidth) + maxFontWidth = s.Width; + + AddFileToDataGrid(file); + if (++j > pluginSettings.MaxResultsShown) + break; + } + + // Resize name column to the biggest element on the list + dataGridPanel.Columns[0].Width = maxFontWidth + 10 /* some padding */; + } + + private double AdjustSimilarityBasedOnSearchPattern(FileData file) + { + if (pluginSettings.PrioritizedPattern != "" && pluginSettings.PatternMatchSimilarityBoost > 0) + { + string[] patterns = pluginSettings.PrioritizedPattern.Split(new char[] { ',', ';' }); + foreach (string pat in patterns) + { + if (pluginSettings.ApplyPrioritizedPatternToFilepath ? file.Path.Contains(pat) : file.Name.Contains(pat) ) + file.Similarity += pluginSettings.PatternMatchSimilarityBoost; + } + } + + if (PluginBase.CurrentProject != null && pluginSettings.InProjectDirectorySimilarityBoost > 0) + { + String projectFolder = Path.GetDirectoryName(PluginBase.CurrentProject.ProjectPath); + if (file.Path.StartsWith(@"\") || file.Path.StartsWith(projectFolder)) + file.Similarity += pluginSettings.InProjectDirectorySimilarityBoost; + } + return file.Similarity; + } + + private double GetStringSimilarity(string first, string second) + { + double similarity = 0.0; + + switch (pluginSettings.StringMatchingAlgorithm) + { + case StringMatchingAlgorithms.SmithWaterman: + if (pluginSettings.NormalizeSearchResults) + similarity = smithWaterman.GetSimilarity(first, second); + else + similarity = smithWaterman.GetUnnormalisedSimilarity(first, second); + break; + case StringMatchingAlgorithms.SmithWatermanGotoh: + if (pluginSettings.NormalizeSearchResults) + similarity = smithWatermanGotoh.GetSimilarity(first, second); + else + similarity = smithWatermanGotoh.GetUnnormalisedSimilarity(first, second); + break; + case StringMatchingAlgorithms.Jaro: + if (pluginSettings.NormalizeSearchResults) + similarity = jaro.GetSimilarity(first, second); + else + similarity = jaro.GetUnnormalisedSimilarity(first, second); + break; + case StringMatchingAlgorithms.JaroWinkler: + if (pluginSettings.NormalizeSearchResults) + similarity = jaroWinkler.GetSimilarity(first, second); + else + similarity = jaroWinkler.GetUnnormalisedSimilarity(first, second); + break; + } + + return similarity; + } + } + + class FileData + { + public FileData(String name, String path) + { + Name = name; + Path = path; + Similarity = 0.0; + } + public String Name; + public String Path; + public double Similarity; + } +} diff --git a/src/Source/Settings.cs b/src/Source/Settings.cs new file mode 100644 index 0000000..dd317b4 --- /dev/null +++ b/src/Source/Settings.cs @@ -0,0 +1,308 @@ +using System; +using System.ComponentModel; +using System.Drawing; +using System.Drawing.Design; +using System.Windows.Forms.Design; +using System.Windows.Forms; +using System.IO; +using System.Collections.Generic; + +namespace FindFilesPlugin +{ + [Serializable] + public class Settings + { + private const StringMatchingAlgorithms DEFAULT_SEARCH_ALGORITHM = StringMatchingAlgorithms.SmithWaterman; + private const String SHORTCUTS_CATEGORY = "Shortcuts"; + private const String FINDFILES_CATEGORY = "Find Files"; + private const String ADVANCED_CATEGORY = "Advanced"; + public const String DEFAULT_SEARCHDIRECTORY = ""; + private const String DEFAULT_PRIORITIZED_PATTERN = ""; + public const String DEFAULT_SEARCHFILTER = "*.*"; + private const Boolean DEFAULT_CTRL_CLICK_ENABLED = true; + private const Boolean DEFAULT_NORMALIZE_SEARCH = false; + private const Boolean DEFAULT_SEARCH_PROJECT = true; + private const Boolean DEFAULT_SEARCH_DIRECTORIES = false; + private const Boolean DEFAULT_IGNORE_CASE = false; + private const Boolean DEFAULT_APPLY_PRIORITIZED_PATTERN_TO_FILEPATH = true; + public const double DEFAULT_NORMALIZED_SEARCH_THRESHOLD = 0.5; + private const double DEFAULT_PATTERN_MATCH_SIMILARITY_BOOST = 0.1; + private const double DEFAULT_IN_PROJECT_DIR_SIMILARITY_BOOST = 0.0; + private const int DEFAULT_MAX_RESULTS_SHOWN = 25; + private const Keys DEFAULT_RESOURCE_SHORTCUT = Keys.Alt | Keys.Shift | Keys.O; + private const Keys DEFAULT_OUTLINE_SHORTCUT = Keys.Control | Keys.Shift | Keys.O; + + private bool searchInProjectOnly; + private Size findFilesFormSize; + private Size outlineFormSize; + private StringMatchingAlgorithms stringMatchingAlgorithm = DEFAULT_SEARCH_ALGORITHM; + private String searchDirectory = DEFAULT_SEARCHDIRECTORY; + private String excludedDirectories = DEFAULT_SEARCHDIRECTORY; + private String searchFilter = DEFAULT_SEARCHFILTER; + private String prioritizedPattern = DEFAULT_PRIORITIZED_PATTERN; + private Keys openResourceShortcut = DEFAULT_RESOURCE_SHORTCUT; + private Keys quickOutlineShortcut = DEFAULT_OUTLINE_SHORTCUT; + private Boolean showDirectories = DEFAULT_SEARCH_DIRECTORIES; + private Boolean searchProject = DEFAULT_SEARCH_PROJECT; + private Boolean ignoreCase = DEFAULT_SEARCH_PROJECT; + private Boolean applyPrioritizedPatternToFilepath = DEFAULT_APPLY_PRIORITIZED_PATTERN_TO_FILEPATH; + private Boolean ctrlClickEnabled = DEFAULT_CTRL_CLICK_ENABLED; + private Boolean normalizeSearchResults = DEFAULT_NORMALIZE_SEARCH; + private double patternMatchSimilarityBoost = DEFAULT_PATTERN_MATCH_SIMILARITY_BOOST; + private double inProjectDirectorySimilarityBoost = DEFAULT_IN_PROJECT_DIR_SIMILARITY_BOOST; + private double normalizedSearchThreshold = DEFAULT_NORMALIZED_SEARCH_THRESHOLD; + private int maxResultsShown = DEFAULT_MAX_RESULTS_SHOWN; + private int fileNameWidth = 0; + private int filePathWidth = 0; + + #region Hidden Plugin Settings + + [Browsable(false)] + public Size FindFilesFormSize + { + get { return findFilesFormSize; } + set { findFilesFormSize = value; } + } + [Browsable(false)] + public Size OutlineFormSize + { + get { return outlineFormSize; } + set { outlineFormSize = value; } + } + + [Browsable(false)] + public int FileNameWidth + { + get { return fileNameWidth; } + set { fileNameWidth = value; } + } + [Browsable(false)] + public int FilePathWidth + { + get { return filePathWidth; } + set { filePathWidth = value; } + } + [Browsable(false)] + public bool SearchInProjectOnly + { + get { return searchInProjectOnly; } + set { searchInProjectOnly = value; } + } + + [Browsable(false)] + public String DefaultEmptyString { get { return DEFAULT_SEARCHDIRECTORY; } } + [Browsable(false)] + public String DefaultSearchFilter { get { return DEFAULT_SEARCHFILTER; } } + #endregion + + #region ADVANCED SETTINGS + + [Category(ADVANCED_CATEGORY)] + [DisplayName("Normalized Search Threshold")] + [Description("A threshold that determines when a string is similar enough to get shown in the Find Window. Lower numbers result in more, but less accurate results. Must have \"Normalize Search Results\" enabled in oder to have an effect.")] + [DefaultValue(DEFAULT_NORMALIZED_SEARCH_THRESHOLD)] + public Double NormalizedSearchThreshold + { + get + { + if (normalizedSearchThreshold < 0 || normalizedSearchThreshold > 1) + { + MessageBox.Show("Search Threshold needs to be a floating point number between 0 and 1. Threshold Reset to 0.5.", "Invalid Search Threshold", MessageBoxButtons.OK); + normalizedSearchThreshold = DEFAULT_NORMALIZED_SEARCH_THRESHOLD; + } + return normalizedSearchThreshold; + } + set { normalizedSearchThreshold = value; } + } + + [Category(ADVANCED_CATEGORY)] + [DisplayName("Normalize Search Results")] + [Description("Checks accuracy of words based on a scale of 0-1. Setting this to True will allow results to be discarded if their similarity boost is less than the specified \"Normalied Search Threshold\". This results in better performance at the cost of losing potential matches.")] + [DefaultValue(DEFAULT_NORMALIZE_SEARCH)] + public Boolean NormalizeSearchResults + { + get { return normalizeSearchResults; } + set { normalizeSearchResults = value; } + } + + [Category(ADVANCED_CATEGORY)] + [DisplayName("Ignore Case")] + [Description("Causes file names to be lowercased before being compared with the search term.")] + [DefaultValue(DEFAULT_IGNORE_CASE)] + public Boolean IgnoreCase + { + get { return ignoreCase; } + set { ignoreCase = value; } + } + + [Category(ADVANCED_CATEGORY)] + [DisplayName("Apply Prioritized Pattern to File Path")] + [Description("Setting this to False will cause the program to search for the\"Prioritized String Patterns\" on the File Name (including extension) only, instead of the whole File Path. Set to False for slightly better performance.")] + [DefaultValue(DEFAULT_APPLY_PRIORITIZED_PATTERN_TO_FILEPATH)] + public Boolean ApplyPrioritizedPatternToFilepath + { + get { return applyPrioritizedPatternToFilepath; } + set { applyPrioritizedPatternToFilepath = value; } + } + + [Category(ADVANCED_CATEGORY)] + [DisplayName("Similarity Boost - Pattern Match")] + [Description("Value that gets added to the similarity of files that contain a prioritized pattern. Increasing this number makes files that contain a prioritized pattern in their name more likely to show up first in the search results.")] + [DefaultValue(DEFAULT_PATTERN_MATCH_SIMILARITY_BOOST)] + public double PatternMatchSimilarityBoost + { + get { return patternMatchSimilarityBoost; } + set { patternMatchSimilarityBoost = value; PluginMain.ClearCachedFiles(); } + } + + [Category(ADVANCED_CATEGORY)] + [DisplayName("Similarity Boost - Within Project")] + [Description("Only valid if \"Search Project\" is set to True - Value that gets added to the similarity of files that are in the current project. Increasing this number makes files in the project more likely to show up first in the search results.")] + [DefaultValue(DEFAULT_IN_PROJECT_DIR_SIMILARITY_BOOST)] + public double InProjectDirectorySimilarityBoost + { + get { return inProjectDirectorySimilarityBoost; } + set { inProjectDirectorySimilarityBoost = value; PluginMain.ClearCachedFiles(); } + } + #endregion + + #region FINDFILES SETTINGS + + [Category(FINDFILES_CATEGORY)] + [DisplayName("Search Directories")] + [Description("If this is set the program will look for files in the specified directories, separated by delimiter (',' ';'). ONLY ABSOLUTE PATHS!")] + [DefaultValue(DEFAULT_SEARCHDIRECTORY)] + [Editor(typeof(FolderNameEditor), typeof(UITypeEditor))] + public String SearchDirectory + { + get { return searchDirectory; } + set { searchDirectory = value; PluginMain.ClearCachedFiles(); } + } + + [Category(FINDFILES_CATEGORY)] + [DisplayName("Excluded Directories")] + [Description("A list of excluded directories, separated by delimiter (',' ';'). ONLY ABSOLUTE PATHS!")] + [DefaultValue(DEFAULT_SEARCHDIRECTORY)] + [Editor(typeof(FolderNameEditor), typeof(UITypeEditor))] + public String ExcludedDirectories + { + get { return excludedDirectories; } + set { excludedDirectories = value; PluginMain.ClearCachedFiles(); } + } + + [Category(FINDFILES_CATEGORY)] + [DisplayName("File Type Filter")] + [Description("Search for files with a specific extension. Separate extensions with ',' for multiple file type search. Extensions should be of format *.ext ")] + [DefaultValue(DEFAULT_SEARCHFILTER)] + public String SearchFilter + { + get { return searchFilter; } + set { searchFilter = value; PluginMain.ClearCachedFiles(); } + } + + [Category(FINDFILES_CATEGORY)] + [DisplayName("Search Project")] + [Description("Search for files in the current project in addition to the specified \"Search Directories\" (only valid if a project is opened). ")] + [DefaultValue(DEFAULT_SEARCH_PROJECT)] + public Boolean SearchProject + { + get { return searchProject; } + set { searchProject = value; PluginMain.ClearCachedFiles(); } + } + + [Category(FINDFILES_CATEGORY)] + [DisplayName("Show Directories")] + [Description("Also show directories in search results (directory name used for matching, will open windows explorer if selected).")] + [DefaultValue(DEFAULT_SEARCH_DIRECTORIES)] + public Boolean ShowDirectories + { + get { return showDirectories; } + set { showDirectories = value; PluginMain.ClearCachedFiles(); } + } + + [Category(FINDFILES_CATEGORY)] + [DisplayName("Prioritized String Patterns")] + [Description("If a file name contains the above text, it will get a priority boost (adjustable). File extensions can be prioritized if the \"Remove File Extension\" setting is disabled. (SEPARATE MULTIPLES WITH ',')")] + [DefaultValue(DEFAULT_PRIORITIZED_PATTERN)] + public String PrioritizedPattern + { + get { return prioritizedPattern; } + set { prioritizedPattern = value; PluginMain.ClearCachedFiles(); } + } + + [Category(FINDFILES_CATEGORY)] + [DisplayName("String Matching Algorithm")] + [Description("Select the algorithm used to find the best match for your search. Try them out to find the one that gets the best results with good performance for your situation.")] + [DefaultValue(DEFAULT_SEARCH_ALGORITHM)] + public StringMatchingAlgorithms StringMatchingAlgorithm + { + get { return stringMatchingAlgorithm; } + set { stringMatchingAlgorithm = value; } + } + #endregion + + #region MISC SETTINGS + // MISC Category + [DisplayName("Enable navigation by Ctrl+Click")] + [Description("Go to declaration by Ctrl+Click on the word")] + [DefaultValue(DEFAULT_CTRL_CLICK_ENABLED)] + public Boolean CtrlClickEnabled + { + get { return ctrlClickEnabled; } + set { ctrlClickEnabled = value; } + } + + // MISC Category + [DisplayName("Max Results Shown")] + [Description("Maximum number of results shown in find windows. Valid range is 1 - 100")] + [DefaultValue(DEFAULT_MAX_RESULTS_SHOWN)] + public int MaxResultsShown + { + get + { + if (maxResultsShown <= 1 || maxResultsShown > 100) + { + MessageBox.Show("\"Max Results Show\" valid range is between 1 - 100. Value reset to default [25].", "FindFiles Error", MessageBoxButtons.OK); + maxResultsShown = DEFAULT_MAX_RESULTS_SHOWN; + } + return maxResultsShown; + } + set { maxResultsShown = value; } + } + #endregion + + #region SHORTCUTS SETTINGS + + [Category(SHORTCUTS_CATEGORY)] + [DisplayName("Find Files")] + [Description("Shortcut to open the Find Files dialog.")] + [DefaultValue(DEFAULT_RESOURCE_SHORTCUT)] + [Browsable(false)] // Shortcuts now set using the shortcut manager (FD4 way) + public Keys OpenResourceShortcut + { + get { return openResourceShortcut; } + set { openResourceShortcut = value; } + } + + [Category(SHORTCUTS_CATEGORY)] + [DisplayName("Quick Outline")] + [Description("Shortcut to open QuickOutline dialog")] + [DefaultValue(DEFAULT_OUTLINE_SHORTCUT)] + [Browsable(false)] // Shortcuts now set using the shortcut manager (FD4 way) + public Keys QuickOutlineShortcut + { + get { return quickOutlineShortcut; } + set { quickOutlineShortcut = value; } + } + #endregion + + } + + public enum StringMatchingAlgorithms + { + SmithWatermanGotoh, + SmithWaterman, + JaroWinkler, + Jaro + } +} \ No newline at end of file diff --git a/src/app.config b/src/app.config new file mode 100644 index 0000000..ea93c85 --- /dev/null +++ b/src/app.config @@ -0,0 +1,3 @@ + + + diff --git a/src/gtk-refresh.png b/src/gtk-refresh.png new file mode 100644 index 0000000..d868432 Binary files /dev/null and b/src/gtk-refresh.png differ