diff --git a/Documentation/Reuters WNE Packet Format.md b/Documentation/Reuters WNE Packet Format.md index 21691e5..d311ead 100644 --- a/Documentation/Reuters WNE Packet Format.md +++ b/Documentation/Reuters WNE Packet Format.md @@ -9,23 +9,172 @@ Common bytes: - Bytes 2-3: Packet length - Bytes 4-7: Session ID. This changes for every news story. -## A/V File Payloads: -- Byte 1: always 0x01 -- Bytes 8-11: Block number, always incremented by 1 each packet. -- Bytes 12-15: always 0x00000000 -- Bytes 16-end: payload +This format looks suspciously like it's TLV based, but I couldn't figure out how exactly the T's and L's are laid out. (Perhaps they use 2 bytes?) +All multi-byte integers are little endian. -## A/V File announcement: -- Byte 1: always 0x03 -- Bytes 8-11: ??? -- Bytes 12-13: Packet length, minus header (16) minus 8. +## About byte 1 +Byte 1 determines what kind of packet we're dealing with. -## Metadata File Payloads: --Bytes 16-end: payload +| Byte 1 | Packet Type | +|--------|---------------------------------------------------------------------------------| +| 0x01 | A/V Payload, Metadata Payload, Metadata announcement, or Metadata closer packet | +| 0x03 | A/V announcement | +| 0xff | A/V closer | -## Metadata file announcement: -- Bytes 12-13: ??? -- Bytes 14-15: (uint16) Packet length minus 15 -- Bytes 16-17: always seems to be 0x0003 -- Bytes 18-19: (uint16) Always seems to be the value of bytes 14-15 minus one. + + + +## Metadata Announcement packet structure (C) +| Byte Index | Description | +|------------|--------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0 | always 0x00 | +| 1 | always 0x01 | +| 2-3 | Packet length | +| 4-7 | Session ID | +| 8 | ECC group ID (shared with B/C) | +| 9 | ? | +| 10 | always 0x00 ? | +| 11 | ECC block counter, incremented by one each packet | +| 12 | ECC flags, if this is 0, the payload is raw data, if this is 0x07, the payload is ECC data and the next packet will have ECC group incremented by one. | +| 13 | usually 0x00, sometimes 0x20, purpose not known | +| 14-15 | If byte 12 is 0, then Packet length minus 15, If byte 12 is 0x07, then ? +| 16-end | If ecc flags is 0x07, then this is an ECC payload, if ECC flags is 0x00, continue reading. | + +| Byte Index if ECC flags is 0 | Description | +|------------------------------|----------------------------------------------------------| +| 16 | Always 0x00 | +| 17 | Always 0x03 | +| 18-19 | Packet length minus 16 | +| 20-23 | File ID. This changes for every metadata file. | +| 24-28 | Block ID. Incremented by one for every part of the file. | +| 29-32 | Always 0x00000000 | +| 33-end | Payload | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Metadata Payload packet structure (B) +| Byte Index | Description | +|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0 | always 0x00 | +| 1 | always 0x01 | +| 2-3 | Packet length | +| 4-7 | Session ID | +| 8 | ECC group ID (shared with B/C) | +| 9 | ? | +| 10 | always 0x00 ? | +| 11 | ECC block counter, incremented by one each packet | +| 12 | ECC flags, if this is 0, the payload is raw data, if this is 0x07, the payload is ECC data and the next packet will have ECC group incremented by one, and the block counter set to 0. | +| 13 | usually 0x00, sometimes 0x20, purpose not known | +| 14-15 | If byte 12 is 0, then Packet length minus 15, If byte 12 is 0x07, then ? | +| 16-end | If ecc flags is 0x07, then this is an ECC payload, if ECC flags is 0x00, continue reading. | + +| Byte Index if ECC flags is 0 | Description | +|------------------------------|----------------------------------------------------------| +| 16 | Always 0x00 | +| 17 | Always 0x01 | +| 18-19 | Packet length minus 16 | +| 20-23 | File ID. This changes for every metadata file. | +| 24-28 | Block ID. Incremented by one for every part of the file. | +| 29-32 | Always 0x00000000 | +| 33-end | Payload | + + + + + + + + + + + + + + + + + + + + + + + + + + + +## A/V Announcement packet structure (D) +| Byte Index | Description | +|------------|--------------------| +| 0 | always 0x00 | +| 1 | always 0x03 | +| 2-3 | Packet length | +| 4-7 | Session ID | +| 8-11 | Packet length - 8 | +| 12-15 | Packet length - 12 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## AV Payload packet structure (A) + +| Byte Index | Description | +|------------|----------------| +| 0 | always 0x00 | +| 1 | always 0x01 | +| 2-3 | Packet length | +| 4-7 | Session ID | diff --git a/skyscraper8/Properties/launchSettings.json b/skyscraper8/Properties/launchSettings.json index 087bf5d..5a87411 100644 --- a/skyscraper8/Properties/launchSettings.json +++ b/skyscraper8/Properties/launchSettings.json @@ -2,7 +2,7 @@ "profiles": { "skyscraper8": { "commandName": "Project", - "commandLineArgs": "rotation-catalogue F:\\\\ F:\\\\rotate-us.csv", + "commandLineArgs": "\"C:\\devel\\skyscraper8\\skyscraper8\\bin\\Debug\\net8.0\\reuters-dde-10g.ts\"", "remoteDebugEnabled": false }, "Container (Dockerfile)": { diff --git a/skyscraper8/ReutersWne/ReutersWneExtractor.cs b/skyscraper8/ReutersWne/ReutersWneExtractor.cs index 59e8474..883969c 100644 --- a/skyscraper8/ReutersWne/ReutersWneExtractor.cs +++ b/skyscraper8/ReutersWne/ReutersWneExtractor.cs @@ -1,5 +1,6 @@ using log4net; using skyscraper5.Ietf.Rfc971; +using skyscraper5.Skyscraper.IO; using skyscraper5.Skyscraper.Plugins; namespace skyscraper8.ReutersWne; @@ -17,6 +18,8 @@ public class ReutersWneExtractor : ISkyscraperMpePlugin public void SetContext(DateTime? currentTime, object skyscraperContext) { //TODO: remember current time and skyscraper context + if (wneStories == null) + wneStories = new Dictionary(); } public bool CanHandlePacket(InternetHeader internetHeader, byte[] ipv4Packet) @@ -40,6 +43,9 @@ public class ReutersWneExtractor : ISkyscraperMpePlugin return true; } + private DirectoryInfo outputDirectory; + private int packetSerial = 0; + public void HandlePacket(InternetHeader internetHeader, byte[] ipv4Packet) { Span udpPayload = new Span(ipv4Packet,8,ipv4Packet.Length-8); @@ -51,19 +57,114 @@ public class ReutersWneExtractor : ISkyscraperMpePlugin byte msgFamily = udpPayload[1]; ushort length = udpPayload.ReadUInt16LittleEndian(2); uint sessionId = udpPayload.ReadUInt32LittleEndian(4); - - if (msgFamily == 0xff) - return; - if (length != udpPayload.Length && !(length == 16 && udpPayload.Length == 18) && msgFamily != 0xff) + if (length != udpPayload.Length && !(length == 16 && udpPayload.Length == 18)) { return; } - + + switch (msgFamily) + { + case 0x01: + ParsePacketType1(udpPayload); + break; + default: + OnError("Unknown packet type {0:X2}", msgFamily); + return; + } + + /*if (packetSerial < 1100) + { + + if (outputDirectory == null) + { + outputDirectory = new DirectoryInfo("wne_dump"); + outputDirectory.EnsureExists(); + } + string fname = string.Format("wne_dump/wne_{0:D4}.bin", packetSerial); + File.WriteAllBytes(fname, udpPayload.ToArray()); + packetSerial++; + }*/ + } + + private bool ParsePacketType1(Span udpPayload) + { + byte byte0 = udpPayload[0]; + if (byte0 != 0x00) + return false; + + byte msgFamily = udpPayload[1]; + ushort length = udpPayload.ReadUInt16LittleEndian(2); + uint sessionId = udpPayload.ReadUInt32LittleEndian(4); + uint fourthUint = udpPayload.ReadUInt32LittleEndian(12); + + if (fourthUint == 0) + { + if (length == 16) + { + //Empty packet, likely to be used to announce stories. + if (!wneStories.ContainsKey(sessionId)) + { + _logger.InfoFormat("Found new WNE story #{0}", sessionId); + WneStory newStory = new WneStory(); + newStory.EccGroup = udpPayload[8]; + if (udpPayload[11] == 0x07) + newStory.ExpectedEccGroup = (byte)(newStory.EccGroup + 1); + wneStories.Add(sessionId, newStory); + return true; + } + else + { + WneStory currentStory = wneStories[sessionId]; + if (udpPayload[8] == currentStory.ExpectedEccGroup) + currentStory.timesSucessfullySynced++; + currentStory.EccGroup = udpPayload[8]; + if (udpPayload[11] == 0x07) + { + currentStory.ExpectedEccGroup = (byte)(udpPayload[8] + 1); + } + return true; + } + } + else + { + //A/V Payload + OnError("A/V Payloads not supported yet."); + return false; + } + } + else + { + OnError("Non-A/V Payloads not supported yet."); + return false; + } } public bool StopProcessingAfterThis() { return true; } + + private HashSet loggedErrors; + private Dictionary wneStories; + + private void OnError(string message, params object[] args) + { + if (loggedErrors == null) + loggedErrors = new HashSet(); + + string unpacked = String.Format(message, args); + if (!loggedErrors.Contains(unpacked)) + { + loggedErrors.Add(unpacked); + _logger.Warn(unpacked); + } + } + + private class WneStory + { + public byte EccGroup; + public byte ExpectedEccGroup; + public int timesSucessfullySynced; + } }