AusPostCode/Barcode.cs

260 lines
9.8 KiB
C#

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace AusPostCode {
public class Barcode {
private readonly Dictionary<int, string> _formatTable = new Dictionary<int, string> {
[0] = "Null Customer Barcode",
[11] = "Standard Customer Barcode",
[52] = "Customer Business Reply Paid",
[59] = "Customer Barcode 2",
[62] = "Customer Barcode 3",
[67] = "Customer Business Reply Paid",
[72] = "International Business Reply Paid",
[77] = "International Business Reply Paid",
};
private readonly Dictionary<string, BarcodeFragment> _fragments = new Dictionary<string, BarcodeFragment> {
["FormatControlCode"] = new BarcodeFragment(EncodingFormat.N, 4, 2),
["SortingCode"] = new BarcodeFragment(EncodingFormat.N, 16, 6),
["CustomerInformation2"] = new BarcodeFragment(EncodingFormat.C, 16, 22),
["CustomerInformation3"] = new BarcodeFragment(EncodingFormat.C, 30, 22),
["ErrorCorrection"] = new BarcodeFragment(EncodingFormat.BarToDecimal, 12, -16),
};
// TODO: make these consts
private readonly char[] _lower = "abcdefghijklmnopqrstuvwxyz".ToCharArray();
private readonly char[] _upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".ToCharArray();
private readonly int _formatCode;
public Barcode(string code) {
Code = code;
// - check for start and end bars
if (Code.Substring(0, 2) != "13") {
Warnings.Add("Couldn't find Start Bars");
Code = $"13{Code}";
}
if (Code.Substring(Code.Length - 3, 2) != "13") {
Warnings.Add("Couldn't find End Bars");
Code = $"{Code}13";
}
// process Format Control Code
_formatCode = int.Parse(GetFragment(_fragments["FormatControlCode"]));
Console.WriteLine(Format);
// process Sorting Code Field
SortingCode = int.Parse(GetFragment(_fragments["SortingCode"]));
Console.WriteLine($"Sorting code: {SortingCode}");
// process Customer Information, if necessary
// TODO: actually do this, and get a code to test it on. also, ensure that customer codes are the right length
if (_formatCode == 59) {
Console.WriteLine("This code has a Customer Information 2 field, but I don't know how to process that yet 0uo");
}
else if (_formatCode == 62) {
Console.WriteLine($"Customer info: [{GetFragment(_fragments["CustomerInformation3"])}]");
}
// process Reed-Solomon Error Correction Bars
// TODO: actually validate the barcode
Console.WriteLine(BaseConversion.FromBase(GetFragment(_fragments["ErrorCorrection"]), 64));
}
public int SortingCode { get; set; }
public int CustomerInformation { get; set; }
public string Format =>
_formatTable.ContainsKey(_formatCode) ? _formatTable[_formatCode] : $"Unknown ({_formatCode})";
private List<string> Warnings { get; } = new List<string>();
private string Code { get; }
private string GetFragment(BarcodeFragment fragment, bool decode = true) {
var data = Code.Substring(fragment.Offset < 0 ? Code.Length + fragment.Offset : fragment.Offset, fragment.Length);
if (!decode) {
return data;
}
// bool badData;
var chunkLength = fragment.TypicalFormat == EncodingFormat.N ? 2 : 3;
var rx = new Regex("^[0123]+$");
var sb = new StringBuilder();
if (!rx.IsMatch(data)) {
throw new ArgumentException("Input length must be a quaternary number.", nameof(data));
}
for (var i = 0; i < data.Length; i += chunkLength) {
var chunk = data.Substring(i, chunkLength);
switch (fragment.TypicalFormat) {
case EncodingFormat.N:
// format N supports the digits 0 through 9, and nothing else.
// digits 0 through 8 are stored as their ternary representations, while 9 is stored as "30".
if (chunk == "30") {
sb.Append(9);
}
else if (chunk.Contains("3")) {
// not a ternary number
throw new ArgumentException($"{chunk} is not a valid identifier for format {data}.");
}
else {
sb.Append(BaseConversion.FromBase(chunk, 3));
}
break;
case EncodingFormat.C:
// table C is a bit of a doozy.
// firstly, check for 222, which is zero.
if (chunk == "222") {
sb.Append("0");
}
// if it's any other valid ternary number (only digits 0,1,2), it's a capital letter.
else if (!chunk.Contains("3")) {
sb.Append(_upper[BaseConversion.FromBase(chunk, 3)]);
}
// if it starts with a 3, but is otherwise a valid ternary number, it's a digit from 1 through 9.
// the sequence is 300, 301, 302, 310, 311...
// because it starts at 0 (after stripping the leading 3), we add 1.
else if (chunk.StartsWith("3") && !chunk.Substring(1).Contains("3")) {
sb.Append(BaseConversion.FromBase(chunk.Substring(1), 3) + 1);
}
// 003 is space.
else if (chunk == "003") {
sb.Append(" ");
}
// 013 is hash.
else if (chunk == "013") {
sb.Append("#");
}
// and finally, anything that doesn't fall into any of the above is a lowercase letter.
else {
// this is by far the most difficult part when it comes to parsing the barcode formats (so far).
// i'm really bad at maths, and it took me a while to get this function into something (semi) readable
// down from the multi-cell spreadsheet formula mess that it started out as.
// i think this is the first time in my life i've ever done maths in a base other than 10, unless you
// count converting time differences.
// starting from 023, the lowercase numbers are expressed by a cyclical pattern.
// each "cycle" lasts for seven letters. let's use the letters "bcdefgh" as an example.
// b is 030 (12 in decimal). c follows at 031, d at 032, and e at 033. this is the first half of the cycle.
// during the second half of the cycle, letters are spaced *four* numbers apart,
// instead of one number apart like before.
// this means that e (033q == 15d) is *not* followed by f at 100q (16d), but at 103q (19d).
// g is found at 113q (23d), and then h is found at 123q (27d).
// i follows at 130q (28d), and the cycle repeats.
// the cycle resets at multiples of 16, which is very useful.
// the letters start at 11d, so we subtract 11 to start from zero instead.
var x = BaseConversion.FromBase(chunk, 4) - 11;
// you can tell how many times the cycle has looped by counting how many times 16 divides into the chunk.
// because the cycle is seven steps long, we should add 7 for every time 16 goes into the chunk.
// additionally, if we add 10 to x (and therefore making it effectively (chunk - 1)),
// we can mod it with 16 to get the repeating sequence '10, 11, 12, 13, 14, 2, 6'.
// we can use max() to replace the 2 with a 5, giving us '10, 11, 12, 13, 14, 5, 6'.
// if we only keep the units digit (by doing modulo 10), we get '0, 1, 2, 3, 4, 5, 6'.
// now we have a way to tell how many times we've completed the cycle (how many 16s go into the chunk),
// and a way to tell what step we're on in the cycle, we can combine these two pieces of knowledge to
// turn the messy letter codes into a set of integers from 0 to 25, increasing linearly by 1.
// in other words, when we're given a chunk like "032", we can tell that it refers to the 4th letter of
// the alphabet, and return that.
// i know the way this function is written is slightly unwieldy, but at the very least, it's better than
// wolfram alpha's suggested alternative form:
// -10 floor(1/10 max(5, x - 16 floor((x + 10)/16) + 10)) + max(5, -16 floor((x + 10)/16) + x + 10) + 7 floor(x/16)
// i guess wolfram really doesn't like the mod function? 0uo
sb.Append(_lower[x / 16 * 7 + Math.Max((x + 10) % 16, 5) % 10]);
}
break;
case EncodingFormat.BarToDecimal:
var digitInt = BaseConversion.FromBase(chunk, 4);
sb.Insert(0, BaseConversion.ToBase(digitInt, 64));
break;
default:
throw new ArgumentOutOfRangeException(nameof(fragment.TypicalFormat), fragment.TypicalFormat, null);
}
}
return sb.ToString();
}
private static string Decode(string input, EncodingFormat format) {
// bool badData;
var chunkLength = format == EncodingFormat.N ? 2 : 3;
if (input.Length % 2 != 0) {
throw new ArgumentException($"Input length must be a multiple of {chunkLength}.", nameof(input));
}
var rx = new Regex("^[0123]+$");
if (!rx.IsMatch(input)) {
throw new ArgumentException("Input length must be a quaternary number.", nameof(input));
}
var sb = new StringBuilder();
for (var i = 0; i < input.Length; i += chunkLength) {
var chunk = input.Substring(i, chunkLength);
switch (format) {
case EncodingFormat.N:
// format N supports the digits 0 through 9, and nothing else.
// digits 0 through 8 are stored as their ternary representations, while 9 is stored as "30".
if (chunk == "30") {
sb.Append(9);
}
else if (chunk.Contains("3")) {
// not a ternary number
throw new ArgumentException($"{chunk} is not a valid identifier for format {format.ToString()}.");
}
else {
sb.Append(BaseConversion.FromBase(chunk, 3));
}
break;
case EncodingFormat.C:
break;
case EncodingFormat.BarToDecimal:
break;
default:
throw new ArgumentOutOfRangeException(nameof(format), format, null);
}
}
return sb.ToString();
}
private readonly struct BarcodeFragment {
public EncodingFormat TypicalFormat { get; }
public int Length { get; }
public int Offset { get; }
public BarcodeFragment(EncodingFormat typicalFormat, int length, int offset) {
TypicalFormat = typicalFormat;
Length = length;
Offset = offset;
}
}
private enum EncodingFormat {
N,
C,
BarToDecimal,
}
}
}