using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; namespace AusPostCode { public class Barcode { private readonly Dictionary _formatTable = new Dictionary { [0] = "Null Customer Barcode", [11] = "Standard Customer Barcode", [52] = "Customer Business Reply Paid", [59] = "Customer Barcode 2", [62] = "Customer Barcode 3", [67] = "Customer Business Reply Paid", [72] = "International Business Reply Paid", [77] = "International Business Reply Paid", }; private readonly Dictionary _fragments = new Dictionary { ["FormatControlCode"] = new BarcodeFragment(EncodingFormat.N, 4, 2), ["SortingCode"] = new BarcodeFragment(EncodingFormat.N, 16, 6), ["CustomerInformation2"] = new BarcodeFragment(EncodingFormat.C, 16, 22), ["CustomerInformation3"] = new BarcodeFragment(EncodingFormat.C, 30, 22), ["ErrorCorrection"] = new BarcodeFragment(EncodingFormat.BarToDecimal, 12, -16), }; // TODO: make these consts private readonly char[] _lower = "abcdefghijklmnopqrstuvwxyz".ToCharArray(); private readonly char[] _upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".ToCharArray(); private readonly int _formatCode; public Barcode(string code) { Code = code; // - check for start and end bars if (Code.Substring(0, 2) != "13") { Warnings.Add("Couldn't find Start Bars"); Code = $"13{Code}"; } if (Code.Substring(Code.Length - 3, 2) != "13") { Warnings.Add("Couldn't find End Bars"); Code = $"{Code}13"; } // process Format Control Code _formatCode = int.Parse(GetFragment(_fragments["FormatControlCode"])); Console.WriteLine(Format); // process Sorting Code Field SortingCode = int.Parse(GetFragment(_fragments["SortingCode"])); Console.WriteLine($"Sorting code: {SortingCode}"); // process Customer Information, if necessary // TODO: actually do this, and get a code to test it on. also, ensure that customer codes are the right length if (_formatCode == 59) { Console.WriteLine("This code has a Customer Information 2 field, but I don't know how to process that yet 0uo"); } else if (_formatCode == 62) { Console.WriteLine($"Customer info: [{GetFragment(_fragments["CustomerInformation3"])}]"); } // process Reed-Solomon Error Correction Bars // TODO: actually validate the barcode Console.WriteLine(BaseConversion.FromBase(GetFragment(_fragments["ErrorCorrection"]), 64)); } public int SortingCode { get; set; } public int CustomerInformation { get; set; } public string Format => _formatTable.ContainsKey(_formatCode) ? _formatTable[_formatCode] : $"Unknown ({_formatCode})"; private List Warnings { get; } = new List(); private string Code { get; } private string GetFragment(BarcodeFragment fragment, bool decode = true) { var data = Code.Substring(fragment.Offset < 0 ? Code.Length + fragment.Offset : fragment.Offset, fragment.Length); if (!decode) { return data; } // bool badData; var chunkLength = fragment.TypicalFormat == EncodingFormat.N ? 2 : 3; var rx = new Regex("^[0123]+$"); var sb = new StringBuilder(); if (!rx.IsMatch(data)) { throw new ArgumentException("Input length must be a quaternary number.", nameof(data)); } for (var i = 0; i < data.Length; i += chunkLength) { var chunk = data.Substring(i, chunkLength); switch (fragment.TypicalFormat) { case EncodingFormat.N: // format N supports the digits 0 through 9, and nothing else. // digits 0 through 8 are stored as their ternary representations, while 9 is stored as "30". if (chunk == "30") { sb.Append(9); } else if (chunk.Contains("3")) { // not a ternary number throw new ArgumentException($"{chunk} is not a valid identifier for format {data}."); } else { sb.Append(BaseConversion.FromBase(chunk, 3)); } break; case EncodingFormat.C: // table C is a bit of a doozy. // firstly, check for 222, which is zero. if (chunk == "222") { sb.Append("0"); } // if it's any other valid ternary number (only digits 0,1,2), it's a capital letter. else if (!chunk.Contains("3")) { sb.Append(_upper[BaseConversion.FromBase(chunk, 3)]); } // if it starts with a 3, but is otherwise a valid ternary number, it's a digit from 1 through 9. // the sequence is 300, 301, 302, 310, 311... // because it starts at 0 (after stripping the leading 3), we add 1. else if (chunk.StartsWith("3") && !chunk.Substring(1).Contains("3")) { sb.Append(BaseConversion.FromBase(chunk.Substring(1), 3) + 1); } // 003 is space. else if (chunk == "003") { sb.Append(" "); } // 013 is hash. else if (chunk == "013") { sb.Append("#"); } // and finally, anything that doesn't fall into any of the above is a lowercase letter. else { // this is by far the most difficult part when it comes to parsing the barcode formats (so far). // i'm really bad at maths, and it took me a while to get this function into something (semi) readable // down from the multi-cell spreadsheet formula mess that it started out as. // i think this is the first time in my life i've ever done maths in a base other than 10, unless you // count converting time differences. // starting from 023, the lowercase numbers are expressed by a cyclical pattern. // each "cycle" lasts for seven letters. let's use the letters "bcdefgh" as an example. // b is 030 (12 in decimal). c follows at 031, d at 032, and e at 033. this is the first half of the cycle. // during the second half of the cycle, letters are spaced *four* numbers apart, // instead of one number apart like before. // this means that e (033q == 15d) is *not* followed by f at 100q (16d), but at 103q (19d). // g is found at 113q (23d), and then h is found at 123q (27d). // i follows at 130q (28d), and the cycle repeats. // the cycle resets at multiples of 16, which is very useful. // the letters start at 11d, so we subtract 11 to start from zero instead. var x = BaseConversion.FromBase(chunk, 4) - 11; // you can tell how many times the cycle has looped by counting how many times 16 divides into the chunk. // because the cycle is seven steps long, we should add 7 for every time 16 goes into the chunk. // additionally, if we add 10 to x (and therefore making it effectively (chunk - 1)), // we can mod it with 16 to get the repeating sequence '10, 11, 12, 13, 14, 2, 6'. // we can use max() to replace the 2 with a 5, giving us '10, 11, 12, 13, 14, 5, 6'. // if we only keep the units digit (by doing modulo 10), we get '0, 1, 2, 3, 4, 5, 6'. // now we have a way to tell how many times we've completed the cycle (how many 16s go into the chunk), // and a way to tell what step we're on in the cycle, we can combine these two pieces of knowledge to // turn the messy letter codes into a set of integers from 0 to 25, increasing linearly by 1. // in other words, when we're given a chunk like "032", we can tell that it refers to the 4th letter of // the alphabet, and return that. // i know the way this function is written is slightly unwieldy, but at the very least, it's better than // wolfram alpha's suggested alternative form: // -10 floor(1/10 max(5, x - 16 floor((x + 10)/16) + 10)) + max(5, -16 floor((x + 10)/16) + x + 10) + 7 floor(x/16) // i guess wolfram really doesn't like the mod function? 0uo sb.Append(_lower[x / 16 * 7 + Math.Max((x + 10) % 16, 5) % 10]); } break; case EncodingFormat.BarToDecimal: var digitInt = BaseConversion.FromBase(chunk, 4); sb.Insert(0, BaseConversion.ToBase(digitInt, 64)); break; default: throw new ArgumentOutOfRangeException(nameof(fragment.TypicalFormat), fragment.TypicalFormat, null); } } return sb.ToString(); } private static string Decode(string input, EncodingFormat format) { // bool badData; var chunkLength = format == EncodingFormat.N ? 2 : 3; if (input.Length % 2 != 0) { throw new ArgumentException($"Input length must be a multiple of {chunkLength}.", nameof(input)); } var rx = new Regex("^[0123]+$"); if (!rx.IsMatch(input)) { throw new ArgumentException("Input length must be a quaternary number.", nameof(input)); } var sb = new StringBuilder(); for (var i = 0; i < input.Length; i += chunkLength) { var chunk = input.Substring(i, chunkLength); switch (format) { case EncodingFormat.N: // format N supports the digits 0 through 9, and nothing else. // digits 0 through 8 are stored as their ternary representations, while 9 is stored as "30". if (chunk == "30") { sb.Append(9); } else if (chunk.Contains("3")) { // not a ternary number throw new ArgumentException($"{chunk} is not a valid identifier for format {format.ToString()}."); } else { sb.Append(BaseConversion.FromBase(chunk, 3)); } break; case EncodingFormat.C: break; case EncodingFormat.BarToDecimal: break; default: throw new ArgumentOutOfRangeException(nameof(format), format, null); } } return sb.ToString(); } private readonly struct BarcodeFragment { public EncodingFormat TypicalFormat { get; } public int Length { get; } public int Offset { get; } public BarcodeFragment(EncodingFormat typicalFormat, int length, int offset) { TypicalFormat = typicalFormat; Length = length; Offset = offset; } } private enum EncodingFormat { N, C, BarToDecimal, } } }