From 23d6f697a3a81655fbc190d51a1750108a3fcb46 Mon Sep 17 00:00:00 2001 From: Phil Gilmore Date: Fri, 21 Mar 2025 00:39:32 -0600 Subject: [PATCH] Initial commit. --- .editorconfig | 34 ++ .gitignore | 401 ++++++++++++++++++ license.txt | 21 + readme.md | 176 ++++++++ source/GParse.sln | 28 ++ .../GParse/AnonDelimitedTextInputProvider.cs | 17 + source/GParse/DelimitedTextParser.cs | 36 ++ source/GParse/FixedFieldParser.cs | 15 + source/GParse/GParse.csproj | 28 ++ source/GParse/ITextInputProvider.cs | 5 + source/GParse/ITextParser.cs | 8 + source/GParse/ParseException.cs | 8 + source/GParse/QuoteAwareParser.cs | 52 +++ source/GParse/SplitParser.cs | 8 + source/GParse/StringExtensions.cs | 29 ++ source/GParse/StringTextInputProvider.cs | 6 + .../AnonDelimitedTextInputProviderTests.cs | 19 + .../GParseTests/DelimitedTextParserTests.cs | 26 ++ source/GParseTests/GParseTests.csproj | 31 ++ source/GParseTests/GlobalUsings.cs | 3 + source/GParseTests/QuoteAwareParserTests.cs | 49 +++ source/GParseTests/SplitParserTests.cs | 26 ++ source/GParseTests/UnquoteTests.cs | 25 ++ 23 files changed, 1051 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 license.txt create mode 100644 readme.md create mode 100644 source/GParse.sln create mode 100644 source/GParse/AnonDelimitedTextInputProvider.cs create mode 100644 source/GParse/DelimitedTextParser.cs create mode 100644 source/GParse/FixedFieldParser.cs create mode 100644 source/GParse/GParse.csproj create mode 100644 source/GParse/ITextInputProvider.cs create mode 100644 source/GParse/ITextParser.cs create mode 100644 source/GParse/ParseException.cs create mode 100644 source/GParse/QuoteAwareParser.cs create mode 100644 source/GParse/SplitParser.cs create mode 100644 source/GParse/StringExtensions.cs create mode 100644 source/GParse/StringTextInputProvider.cs create mode 100644 source/GParseTests/AnonDelimitedTextInputProviderTests.cs create mode 100644 source/GParseTests/DelimitedTextParserTests.cs create mode 100644 source/GParseTests/GParseTests.csproj create mode 100644 source/GParseTests/GlobalUsings.cs create mode 100644 source/GParseTests/QuoteAwareParserTests.cs create mode 100644 source/GParseTests/SplitParserTests.cs create mode 100644 source/GParseTests/UnquoteTests.cs diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..28764cf --- /dev/null +++ b/.editorconfig @@ -0,0 +1,34 @@ +root=true + +[{*, *.*}] +resharper_tabs_are_disallowed_highlighting=none + +[*.cs] +tab_width = 4 +indent_size = 4 +indent_style = tab + +[{*.vb, *.bas}] +tab_width = 4 +indent_size = 4 +indent_style = tab + +[*.xml] +tab_width = 4 +indent_size = 4 +indent_style = tab + +[*.py] +tab_width = 4 +indent_size = 4 +indent_style = tab + +[{*.yml, *.yaml}] +tab_width = 2 +indent_size = 2 +indent_style = space + +[*.pas] +tab_width = 2 +indent_size = 2 +indent_style = space diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..89f385f --- /dev/null +++ b/.gitignore @@ -0,0 +1,401 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files +*.ncb +*.aps + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.iml + +# Python virtual environments +venv/ \ No newline at end of file diff --git a/license.txt b/license.txt new file mode 100644 index 0000000..63b4b68 --- /dev/null +++ b/license.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) [year] [fullname] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..b0366ea --- /dev/null +++ b/readme.md @@ -0,0 +1,176 @@ +# GParse ![(Beta)](https://img.shields.io/badge/BETA-yellow?style=plastic) + +A library of useful delimited-text parsers with a common interface. + +## Description + +All parsers in this library implement the ITextParser interface with a single method: + +```csharp +public IEnumerable Parse(...); +``` + +To support deferred execution or more dynamic input, a text provider interface can be used instead of a string for the input. + +```csharp +public interface ITextInputProvider +{ + public string GetText(); +} +``` + +The library contains these parsers: + + +|Class Name| | +|----:|:----| +|SplitParser |A simple wrapper around the .NET string.Split() method. | +|DelimitedTextParser |A custom implementation that looks for delimiters of any size. May be modified in the future to accept multiple delimiters. | +|QuoteAwareParser | A delimited text parser which knows to ignore instances of the delimiter when it is found within quotes. Useful for space-delimited files where the fields are human-readable text and may contain spaces, for example. | + + +## Usage Instructions + +- Create an instance of your chosen parser. +- Then call the Parse() method with your delimited input. +- Execution is deferred. Iterate the collection to retrieve the values. + +See the details for each class below for more information and examples of usage. + +## ITextParser Interface + +The `ITextParser` interface has only one method, `Parse()`. There are overloads to take two types of input, a `string` or an `ITextInputProvider`. + +## ITextInputProvider Interface + +The `ITextInputProvider` interface can be used as a sort of string factory. It can defer the collection of the input string until the moment that it is needed or it can be associated with a function instead of a literal value so that it can be parameterized. + +```csharp +public interface ITextInputProvider +{ + public string GetText(); +} +``` + +## AnonDelimitedTextInputProvider Class + +For convenience, the `AnonDelimitedTextInputProvider` class has been included to provide a universal implementation of `ITextInputProvider`. Its `GetText()` implementation is provided as a function through its constructor. + +_Example:_ + +```csharp +ITextInputProvider provider = new AnonDelimitedTextInputProvider( + static () => Console.ReadLine()); +Console.WriteLine(provider.GetText()); +``` + +## SplitParser + +The `SplitParser` class uses .NET's `string.Split()` method under the hood. This is a wrapper around it to bind it to the ITextParser interface. + +_Example:_ +```csharp +ITextParser parser = new SplitParser("|"); +IEnumerable oneTwoThree = parser.Parse("1|2|3"); +``` + +## DelimitedTextParser + +The `DelimitedTextParser` class is a custom replacement for .NET's `string.Split()` method. This is useful because it may provide more features in the future. + +_Example:_ +```csharp +ITextParser parser = new DelimitedTextParser("|"); +IEnumerable oneTwoThree = parser.Parse("1|2|3"); +``` + +## QuoteAwareParser + +The `QuoteAwareParser` class will ignore delimiters that it finds within quotes. This is useful for inputs which may be space-delimited but where the tokens are in human language and likely contain spaces, for example. The constructor accepts parameters for the openQuote and closeQuote, so the quotes need not be actual quotation charactes. They can be any string. + +If the parsed text has an open quote without a corresponding closing quote, a `ParseException` is thrown. + +_Example:_ +```csharp +ITextParser parser = new QuoteAwareParser(" ", "{", "}"); +List containsSpacesText = parser + .Parse("{This contains spaces} {and so does this}") + .ToList(); + +Console.WriteLine(containsSpacesText[0]); +Console.WriteLine(containsSpacesText[1]); + +// Output is: +//{This contains spaces} +//{and so does this} +``` + +Note in the example above that the `{` and `}` characters are not removed from the tokens during parsing. The quotation characters are maintained. It is up to the caller to remove them if that is what's desired. To facilitate this, see the `string.Unquote()` extension method. + +## string.Unquote() Extension Method + +The `string.Unquote()` extension method is provided to work with the remaining quotes which are kept by the QuoteAwareParser during its Parse() operation. It's easy enough to use. Just call it on the token string and pass the open and closing quotation strings. Here is the `QuoteAwareParser` example revised to use it after parsing. + +```csharp +ITextParser parser = new QuoteAwareParser(" ", "{", "}"); +List containsSpacesText = parser + .Parse("{This contains spaces} {and so does this}") + .Select(static s => s.Unquote("{", "}")) + .ToList(); + +Console.WriteLine(containsSpacesText[0]); +Console.WriteLine(containsSpacesText[1]); + +// Output is: +//This contains spaces +//and so does this +``` + +## Roadmap + +### AutoParser + +If it is determined useful and feasible, create a parser factory which is given an input sample for it to determine which parser should be used and its parameters. For example, if it contains spaces and even number of single or double quotes, it must be a quote-aware parser and the delimiter is what appears between the quotes. Data without quotes can determine the delimiter if there is only one non-alphanumeric non-whitespace character in the sample. If true, this is a DelimitedText parser. + +### DelimitedTextParser + +Future features for the `DelimitedTextParser` class include: +- Multiple delimiters +- Case-insensitive delimiters +- Convert from SafeSubstring to use `Span` and read it one character at a time for performance. + +### QuoteAwareParser + +Future features for the `QuoteAwareParser` class include: +- Multiple delimiters +- Case-insensitive delimiters +- Convert from SafeSubstring to use `Span` and read it one character at a time for performance. + +### Unparsers + +Unparsers will reverse the `IEnumerable` into a single concatenated string. This may be moot in light of what Linq can do, but we'll see if it's more readable or more usable. + +Unparsers can: +- Concatenate a list of tokens, separated by a delimiter. +- Conditional delimiters (omit some delimiters based on predicates) +- Conditional tokens (omit some tokens based on predicates) +- Token transforms / projections (surround a token with brackets, etc.) +- Calculated delimiters (e.g., "1:A", "2:B", "3:C") etc. +- Overall prefix (e.g. "MyPrefix 1,2,3") +- Overall suffix (e.g. "1,2,3 MySuffix") +- Align tokens by space-padding the fields. + +## More Examples +The GParse library is fully unit-tested. You can find examples of use in the unit tests. + +## Source Code +You can find the source code online at my Git server. +https://git.pillidar.com/PillidarPublic/GParse + +## Issues + +No known issues. + +## Notes + +Notes go here. diff --git a/source/GParse.sln b/source/GParse.sln new file mode 100644 index 0000000..11436d5 --- /dev/null +++ b/source/GParse.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.12.35506.116 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GParse", "GParse\GParse.csproj", "{1C8E54C8-5B68-4371-811C-F6BD28EFB75F}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GParseTests", "GParseTests\GParseTests.csproj", "{3951BD10-7CF0-45D5-976D-72403659FC93}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {1C8E54C8-5B68-4371-811C-F6BD28EFB75F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1C8E54C8-5B68-4371-811C-F6BD28EFB75F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1C8E54C8-5B68-4371-811C-F6BD28EFB75F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1C8E54C8-5B68-4371-811C-F6BD28EFB75F}.Release|Any CPU.Build.0 = Release|Any CPU + {3951BD10-7CF0-45D5-976D-72403659FC93}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3951BD10-7CF0-45D5-976D-72403659FC93}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3951BD10-7CF0-45D5-976D-72403659FC93}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3951BD10-7CF0-45D5-976D-72403659FC93}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/source/GParse/AnonDelimitedTextInputProvider.cs b/source/GParse/AnonDelimitedTextInputProvider.cs new file mode 100644 index 0000000..f6ed3aa --- /dev/null +++ b/source/GParse/AnonDelimitedTextInputProvider.cs @@ -0,0 +1,17 @@ +namespace GParse; + +/// +/// An anonymous text input provider. This allows you to defer the capture of the input string until the time of parsing. +/// +/// A function that returns the string to use as input in an ITextParser. +/// +/// ITextInputProvider deferredInputProvider = new(console => console.ReadLine()); +/// (new SplitTextParser(" ")) +/// .Parse() +/// .ToList() +/// .ForEach(s => Console.WriteLine(s)); +/// +public class AnonDelimitedTextInputProvider(Func textCaptureFunc) : ITextInputProvider +{ + public string GetText() => textCaptureFunc(); +} diff --git a/source/GParse/DelimitedTextParser.cs b/source/GParse/DelimitedTextParser.cs new file mode 100644 index 0000000..4cf0b0d --- /dev/null +++ b/source/GParse/DelimitedTextParser.cs @@ -0,0 +1,36 @@ + +namespace GParse; + +public class DelimitedTextParser(string delimiter = ",") : ITextParser +{ + public IEnumerable Parse(ITextInputProvider textInputProvider) => Parse(textInputProvider.GetText()); + + public IEnumerable Parse(string text) + { + int size = text.Length; + int delimiterSize = delimiter.Length; + + int tokenStartIndex = 0; + + for (int index = 0; index < size; index++) + { + if (index + delimiterSize <= text.Length) + if (text.Substring(index, delimiterSize) == delimiter) + { + string token = text.Substring(tokenStartIndex, index - tokenStartIndex); + yield return token; + + tokenStartIndex = index + delimiterSize; + } + } + + // Output last token. + if (tokenStartIndex <= size - 1) + { + string lastToken = text.Substring(tokenStartIndex, size - tokenStartIndex); + yield return lastToken; + } + else + yield return string.Empty; + } +} diff --git a/source/GParse/FixedFieldParser.cs b/source/GParse/FixedFieldParser.cs new file mode 100644 index 0000000..4564067 --- /dev/null +++ b/source/GParse/FixedFieldParser.cs @@ -0,0 +1,15 @@ + +namespace GParse; + +public class FixedFieldParser : ITextParser +{ + public IEnumerable Parse(ITextInputProvider textInputProvider) + { + throw new NotImplementedException(); + } + + public IEnumerable Parse(string text) + { + throw new NotImplementedException(); + } +} diff --git a/source/GParse/GParse.csproj b/source/GParse/GParse.csproj new file mode 100644 index 0000000..88c6f84 --- /dev/null +++ b/source/GParse/GParse.csproj @@ -0,0 +1,28 @@ + + + + net8.0 + enable + enable + GParse + A library of useful delimited-text parsers with a common interface. + readme.md + git + parsing;parser;csv;delimiter;delimited;comma-separated + license.txt + True + spgilmore + + + + + True + \ + + + True + \ + + + + diff --git a/source/GParse/ITextInputProvider.cs b/source/GParse/ITextInputProvider.cs new file mode 100644 index 0000000..caf8607 --- /dev/null +++ b/source/GParse/ITextInputProvider.cs @@ -0,0 +1,5 @@ +namespace GParse; +public interface ITextInputProvider +{ + public string GetText(); +} diff --git a/source/GParse/ITextParser.cs b/source/GParse/ITextParser.cs new file mode 100644 index 0000000..61e1044 --- /dev/null +++ b/source/GParse/ITextParser.cs @@ -0,0 +1,8 @@ +namespace GParse; + +public interface ITextParser +{ + IEnumerable Parse(ITextInputProvider textInputProvider); + IEnumerable Parse(string text); +} + diff --git a/source/GParse/ParseException.cs b/source/GParse/ParseException.cs new file mode 100644 index 0000000..d221a8e --- /dev/null +++ b/source/GParse/ParseException.cs @@ -0,0 +1,8 @@ +namespace GParse; + +public class ParseException : Exception +{ + public ParseException() { } + public ParseException(string message) : base(message) { } + public ParseException(string message, Exception inner) : base(message, inner) { } +} \ No newline at end of file diff --git a/source/GParse/QuoteAwareParser.cs b/source/GParse/QuoteAwareParser.cs new file mode 100644 index 0000000..5817b78 --- /dev/null +++ b/source/GParse/QuoteAwareParser.cs @@ -0,0 +1,52 @@ + +namespace GParse; + +public class QuoteAwareParser(string delimiter, string openQuote, string closeQuote) : ITextParser +{ + public IEnumerable Parse(ITextInputProvider textInputProvider) => Parse(textInputProvider.GetText()); + + public IEnumerable Parse(string text) + { + if (delimiter == openQuote || delimiter == closeQuote) + throw new ArgumentException("Delimiter cannot be the same as the quotes."); + + int size = text.Length; + int delimiterSize = delimiter.Length; + int openQuoteSize = openQuote.Length; + int closeQuoteSize = closeQuote.Length; + int index = 0; + bool inQuote = false; + int tokenStartIndex = 0; + + while (index <= size - 1) + { + if (!inQuote && (text.SafeSubstring(index, openQuoteSize) == openQuote)) + { + // Enter quotes. + inQuote = true; + index += openQuoteSize - 1; + } + else if (inQuote && (text.SafeSubstring(index, closeQuoteSize) == closeQuote)) + { + inQuote = false; + index += closeQuoteSize - 1; + } + else if (!inQuote && (text.SafeSubstring(index, delimiterSize) == delimiter)) + { + string token = text.SafeSubstring(tokenStartIndex, index - tokenStartIndex); + yield return token; + + tokenStartIndex = index + delimiterSize; + index += delimiterSize - 1; + } + + index++; + } + + if (inQuote) + throw new ParseException("Unterminated quotation in input string."); + + string lastToken = text.SafeSubstring(tokenStartIndex, index - tokenStartIndex); + yield return lastToken; + } +} diff --git a/source/GParse/SplitParser.cs b/source/GParse/SplitParser.cs new file mode 100644 index 0000000..bf4c05e --- /dev/null +++ b/source/GParse/SplitParser.cs @@ -0,0 +1,8 @@ +namespace GParse; + +public class SplitParser(string delimiter, StringSplitOptions options = StringSplitOptions.None) : ITextParser +{ + public IEnumerable Parse(ITextInputProvider textInputProvider) => Parse(textInputProvider.GetText()); + + public IEnumerable Parse(string text) => text.Split(delimiter, options).AsEnumerable(); +} diff --git a/source/GParse/StringExtensions.cs b/source/GParse/StringExtensions.cs new file mode 100644 index 0000000..0521192 --- /dev/null +++ b/source/GParse/StringExtensions.cs @@ -0,0 +1,29 @@ +namespace GParse; + +internal static class StringExtensions +{ + internal static string SafeSubstring(this string subject, int startIndex, int size) + { + return startIndex >= 0 + ? startIndex <= subject.Length - 1 + ? subject.Length - 1 >= startIndex + size - 1 + ? subject.Substring(startIndex, size) + : subject.Substring(startIndex) + : string.Empty + : string.Empty; + } +} + +public static class PublicStringExtensions +{ + public static string Unquote(this string subject, string openQuote, string closeQuote) + { + const StringComparison stringComparison = StringComparison.Ordinal; + + string s = subject.StartsWith(openQuote, stringComparison) && subject.EndsWith(closeQuote, stringComparison) + ? subject[openQuote.Length..(subject.Length - closeQuote.Length)] + : subject; + + return s; + } +} diff --git a/source/GParse/StringTextInputProvider.cs b/source/GParse/StringTextInputProvider.cs new file mode 100644 index 0000000..b8222cd --- /dev/null +++ b/source/GParse/StringTextInputProvider.cs @@ -0,0 +1,6 @@ +namespace GParse; + +public class StringTextInputProvider(string text) : ITextInputProvider +{ + public string GetText() => text; +} diff --git a/source/GParseTests/AnonDelimitedTextInputProviderTests.cs b/source/GParseTests/AnonDelimitedTextInputProviderTests.cs new file mode 100644 index 0000000..c40a625 --- /dev/null +++ b/source/GParseTests/AnonDelimitedTextInputProviderTests.cs @@ -0,0 +1,19 @@ +namespace GParseTests; + +public class AnonDelimitedTextInputProviderTests +{ + [Fact] + public void CanConstruct() + { + _ = new AnonDelimitedTextInputProvider(static () => ""); + } + + [Fact] + public void ReturnsExpectedText() + { + const string expected = "abc123"; + var provider = new AnonDelimitedTextInputProvider(static () => expected); + string actual = provider.GetText(); + actual.ShouldBe(expected); + } +} \ No newline at end of file diff --git a/source/GParseTests/DelimitedTextParserTests.cs b/source/GParseTests/DelimitedTextParserTests.cs new file mode 100644 index 0000000..bb024ab --- /dev/null +++ b/source/GParseTests/DelimitedTextParserTests.cs @@ -0,0 +1,26 @@ +namespace GParseTests; + +public class DelimitedTextParserTests +{ + [Fact] + public void CanConstruct() + { + _ = new DelimitedTextParser("|"); + } + + [Theory] + [InlineData("", new string[] { "" })] + [InlineData("1", new string[] { "1" })] + [InlineData("1,2", new string[] { "1", "2" })] + [InlineData(",", new string[] { "", "" })] + [InlineData(",,", new string[] { "", "", "" })] + [InlineData("1,", new string[] { "1", "" })] + [InlineData(",1", new string[] { "", "1" })] + public void VarietyTests(string inputText, IEnumerable expected) + { + const string delimiter = ","; + var parser = new DelimitedTextParser(delimiter); + IEnumerable actual = parser.Parse(inputText); + actual.ShouldBe(expected); + } +} diff --git a/source/GParseTests/GParseTests.csproj b/source/GParseTests/GParseTests.csproj new file mode 100644 index 0000000..14b0800 --- /dev/null +++ b/source/GParseTests/GParseTests.csproj @@ -0,0 +1,31 @@ + + + + net8.0 + enable + enable + + false + true + + + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + + + + diff --git a/source/GParseTests/GlobalUsings.cs b/source/GParseTests/GlobalUsings.cs new file mode 100644 index 0000000..8e5bf4d --- /dev/null +++ b/source/GParseTests/GlobalUsings.cs @@ -0,0 +1,3 @@ +global using Xunit; +global using Shouldly; +global using GParse; \ No newline at end of file diff --git a/source/GParseTests/QuoteAwareParserTests.cs b/source/GParseTests/QuoteAwareParserTests.cs new file mode 100644 index 0000000..da5dce4 --- /dev/null +++ b/source/GParseTests/QuoteAwareParserTests.cs @@ -0,0 +1,49 @@ +namespace GParseTests; + +public class QuoteAwareParserTests +{ + [Fact] + public void CanConstruct() + { + _ = new QuoteAwareParser("|", "{", "}"); + } + + [Theory] + [InlineData("", new string[] { "" })] + [InlineData("1", new string[] { "1" })] + [InlineData("1,2", new string[] { "1", "2" })] + [InlineData(",", new string[] { "", "" })] + [InlineData(",,", new string[] { "", "", "" })] + [InlineData("1,", new string[] { "1", "" })] + [InlineData(",1", new string[] { "", "1" })] + [InlineData("{{a,b,c}}", new string[] { "{{a,b,c}}" })] + [InlineData("{{a,b,c}},{{1,2,3}}", new string[] { "{{a,b,c}}","{{1,2,3}}" })] + [InlineData("{{a,b,c}},", new string[] { "{{a,b,c}}", "" })] + [InlineData(",{{a,b,c}},", new string[] { "", "{{a,b,c}}", "" })] + [InlineData(",{{a,b,c}}", new string[] { "", "{{a,b,c}}" })] + [InlineData("a,b,c", new string[] { "a", "b", "c" })] + + public void VarietyTests(string inputText, IEnumerable expected) + { + const string delimiter = ","; + const string openQuote = "{{"; + const string closeQuote = "}}"; + var parser = new QuoteAwareParser(delimiter, openQuote, closeQuote); + IEnumerable actual = parser.Parse(inputText); + actual.ShouldBe(expected); + } + + [Fact] + public void FailsOnMismatchedQuotes() + { + const string delimiter = ","; + const string openQuote = "{{"; + const string closeQuote = "}}"; + var parser = new QuoteAwareParser(delimiter, openQuote, closeQuote); + const string inputText = "{{this is an unterminated quote"; + + Should.Throw( + () => parser.Parse(inputText).ToList(), + typeof(ParseException)); + } +} diff --git a/source/GParseTests/SplitParserTests.cs b/source/GParseTests/SplitParserTests.cs new file mode 100644 index 0000000..57d8e35 --- /dev/null +++ b/source/GParseTests/SplitParserTests.cs @@ -0,0 +1,26 @@ +namespace GParseTests; + +public class SplitParserTests +{ + [Fact] + public void CanConstruct() + { + _ = new SplitParser("|"); + } + + [Theory] + [InlineData("", new string[] { "" })] + [InlineData("1", new string[] { "1" })] + [InlineData("1,2", new string[] { "1", "2" })] + [InlineData(",", new string[] { "", "" })] + [InlineData(",,", new string[] { "", "", "" })] + [InlineData("1,", new string[] { "1", "" })] + [InlineData(",1", new string[] { "", "1" })] + public void VarietyTests(string inputText, IEnumerable expected) + { + const string delimiter = ","; + var parser = new SplitParser(delimiter); + IEnumerable actual = parser.Parse(inputText); + actual.ShouldBe(expected); + } +} diff --git a/source/GParseTests/UnquoteTests.cs b/source/GParseTests/UnquoteTests.cs new file mode 100644 index 0000000..1ddecda --- /dev/null +++ b/source/GParseTests/UnquoteTests.cs @@ -0,0 +1,25 @@ +namespace GParseTests; + +public class UnquoteTests +{ + [Theory] + [InlineData("{{}}", "")] + [InlineData("{{a}}", "a")] + [InlineData("{{abc}}", "abc")] + [InlineData("{{", "{{")] + [InlineData("{{a", "{{a")] + [InlineData("{{abc", "{{abc")] + [InlineData("}}", "}}")] + [InlineData("a}}", "a}}")] + [InlineData("abc}}", "abc}}")] + [InlineData("}}{{", "}}{{")] + [InlineData("}}a{{", "}}a{{")] + [InlineData("}}abc{{", "}}abc{{")] + public void VarietyTests(string input, string expected) + { + const string openQuote = "{{"; + const string closeQuote = "}}"; + string actual = input.Unquote(openQuote, closeQuote); + actual.ShouldBe(expected); + } +}