Valhalla Legends Forums Archive | General Programming | BitTorrent Bencoding

AuthorMessageTime
shout
I am trying to implement some basic things for Bittorrent in a .Net dll... and I'm not quite sure how to approach Bencoding. (http://wiki.theory.org/BitTorrentSpecification, the part about bencoding).

The Bencoding part is easy enough,
[code]
namespace Stuff.Messages
{
    public class MessageBencoder
    {
        string m_message;

        public MessageBencoder()
        {
            m_message = string.Empty;
        }

        public void InsertString(string message)
        {
            m_message += message.Length.ToString() + ":" + message;
        }

        public void InsertInt(int message)
        {
          m_message += "i" + message.ToString() + "e";
        }

        public void StartList()
        {
            m_message += "l";
        }

        public void StartDictionary()
        {
            m_message += "d";
        }

        public void End()
        {
            m_message += "e";
        }

        public string Get()
        {
            return m_message;
        }

        public byte[] GetEncoded()
        {
            return System.Text.ASCIIEncoding.ASCII.GetBytes(m_message);
        }
[/code]

But I don't know how to approach the unencoding... This is what I came up with but it's messy.

[code]
public class MessageDebencoder
    {
        private byte[] m_messageBuffer;
        private MemoryStream m_message;
        private byte tmpConsumed;
        private bool updated;

        public MessageDebencoder(byte[] message)
        {
            m_messageBuffer = message;
            m_message = new MemoryStream(m_messageBuffer);
        }

        public bool More
        {
            get { return (m_message.Length > 0); }
        }

        public MessageTypes Next()
        {
            tmpConsumed = (byte)m_message.ReadByte();
            updated = true;
            switch (tmpConsumed)
            {
                case (byte)'d':
                    return MessageTypes.MT_StartDictionary;
                case (byte)'l':
                    return MessageTypes.MT_StartList;
                case (byte)'i':
                    return MessageTypes.MT_Int;
                case (byte)'e':
                    return MessageTypes.MT_End;
                default:
                    return MessageTypes.MT_String;
            }
        }

        public MessageTypes Next(byte n)
        {
            switch (n)
            {
                case (byte)'d':
                    return MessageTypes.MT_StartDictionary;
                case (byte)'l':
                    return MessageTypes.MT_StartList;
                case (byte)'i':
                    return MessageTypes.MT_Int;
                case (byte)'e':
                    return MessageTypes.MT_End;
                default:
                    return MessageTypes.MT_String;
            }
        }

        public object GetNext()
        {
            MessageTypes omg;
            if (updated)
                omg = Next(tmpConsumed);
            else
                omg = Next();
            updated = false;
            switch (omg)
            {
                case MessageTypes.MT_Int:
                    {
                        byte[] n = new byte[256];
                        int i;
                        for (i = 0; true; i++)
                        {
                            n[i] = (byte)m_message.ReadByte();
                            if (n[i] == (byte)'e')
                                break;
                        }
                        string s = string.Empty;
                        for (int j = 0; j < i; j++)
                        {
                            if (char.IsDigit((char)n[j]))
                                s += ((char)n[j]).ToString();
                        }
                        return (object)int.Parse(s);
                    }

                case MessageTypes.MT_String:
                    {
                        string s = string.Empty;
                        s += (char)tmpConsumed;
                        for (int i = 1; true; i++)
                        {
                            s += (char)m_message.ReadByte();
                            try //The catch block was removed but this remained
                            {
                                if (s[i] == ':')
                                {
                                    s = s.Remove(i);
                                    break;
                                }
                            }
                            if (i == m_message.Length - m_message.Position)
                            {
                                //exception
                            }
                        }
                        int len = int.Parse(s);
                        s = string.Empty;
                        byte[] tmp = new byte[len];
                        m_message.Read(tmp, 0, len);
                        return (object)ASCIIEncoding.ASCII.GetString(tmp, 0, len);
                    }

                case MessageTypes.MT_StartList:
                    return (object)(MessageTypes.MT_StartList);
                case MessageTypes.MT_StartDictionary:
                    return (object)(MessageTypes.MT_StartDictionary);
                case MessageTypes.MT_End:
                    return (object)(MessageTypes.MT_End);
            }
            updated = false;
            return null;
        }
    }

    public enum MessageTypes
    {
        MT_String = 0,
        MT_Int = 1,
        MT_StartList = 2,
        MT_StartDictionary = 3,
        MT_End = 4,
        MT_ByteString = 5
    };
[/code]

Any suggustions on a better way to implement?
July 5, 2006, 3:34 PM
Myndfyr
Well you're going to run into problems whenever a value that *should* be greater than 0x7f is put into your string, since Encoding.ASCII won't translate larger than that.  It's a good argument for not using strings as buffers.  The other argument for it is that since strings are immutable, you're forcing additional garbage collection because every time you append the string, you're generating a new string and making the runtime clean up the old one.

If you like, I'm sure that my databuffer's implementation can be adjusted, as can my datareader (which is designed to read from databuffer-made packets).  The nice thing is, if you need to adjust your packet encoding or something like that, you can do so without breaking the interface.  Most of the functions are already there for you (for example, your Next() function could be implemented by doing a ReadByte(), converting the result to a char, and then calling the appropriate function such as ReadInt32()). 

In any case, using a string as a buffer for binary data is both problematic and bad OO practice.  There have been other problems noted because of this, and it should be avoided when possible.
July 5, 2006, 4:28 PM
shout
[quote author=MyndFyre[vL] link=topic=15340.msg155339#msg155339 date=1152116889]
Well you're going to run into problems whenever a value that *should* be greater than 0x7f is put into your string, since Encoding.ASCII won't translate larger than that.  It's a good argument for not using strings as buffers.  The other argument for it is that since strings are immutable, you're forcing additional garbage collection because every time you append the string, you're generating a new string and making the runtime clean up the old one.

If you like, I'm sure that my databuffer's implementation can be adjusted, as can my datareader (which is designed to read from databuffer-made packets).  The nice thing is, if you need to adjust your packet encoding or something like that, you can do so without breaking the interface.  Most of the functions are already there for you (for example, your Next() function could be implemented by doing a ReadByte(), converting the result to a char, and then calling the appropriate function such as ReadInt32()). 

In any case, using a string as a buffer for binary data is both problematic and bad OO practice.  There have been other problems noted because of this, and it should be avoided when possible.
[/quote]

I know the string buffer is impractical, that will be changed. This is not binary data. It is textual ASCII encoded data. BitTorrent uses textual data for everything except peer <-> peer transfer.

[code]
d10:im so leeti1337eel4:leeti1337e4:l33te
[/code]
Unencoded would be:
[code]
Dictionary { "im so leet"  : 1337 } List { "leet", 1337, "l337" }
[/code]

The annoying thing about it is there is no set formats for what comes first, second, ect.
July 6, 2006, 5:30 AM
shout
MyndFyre was right again!

Meh... this works... just not too well.

If anyone wants to comment on the rotten fruits of my labor go ahead. :) Theres some trash that needs to be taken care of.

[code]
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;

namespace BitTorrent.Messages
{
    public class MessageDebencoder
    {
        private byte[] m_messageBuffer;
        private MemoryStream m_message;
        private byte tmpConsumed;
        private bool updated;
#if DEBUG
        public string dbgDisplay
        {
            get { return ASCIIEncoding.ASCII.GetString(m_message.GetBuffer()); }
        }
#endif

        public MessageDebencoder(byte[] message)
        {
            m_messageBuffer = message;
            m_message = new MemoryStream(m_messageBuffer, 0, (int)message.Length, false, true);
        }

        public MessageDebencoder(MemoryStream message)
        {
            m_messageBuffer = message.GetBuffer();
            m_message = message;
        }

        public bool More
        {
            get { return (m_message.Length - m_message.Position > 0); }
        }

        public MessageTypes Next()
        {
            if (updated == true)
                return Next(tmpConsumed);
            tmpConsumed = (byte)m_message.ReadByte();
            updated = true;
            switch (tmpConsumed)
            {
                case (byte)'d':
                    return MessageTypes.MT_StartDictionary;
                case (byte)'l':
                    return MessageTypes.MT_StartList;
                case (byte)'i':
                    return MessageTypes.MT_Int;
                case (byte)'e':
                    return MessageTypes.MT_End;
                default:
                    return MessageTypes.MT_String;
            }
        }

        private MessageTypes Next(byte n)
        {
            switch (n)
            {
                case (byte)'d':
                    return MessageTypes.MT_StartDictionary;
                case (byte)'l':
                    return MessageTypes.MT_StartList;
                case (byte)'i':
                    return MessageTypes.MT_Int;
                case (byte)'e':
                    return MessageTypes.MT_End;
                default:
                    return MessageTypes.MT_String;
            }
        }

        public void GetNextDictionaryPair(out string key, out byte[] encoded_value)
        {
            int level = 0;
            byte[] bkey = (byte[])GetNext();
            key = ASCIIEncoding.ASCII.GetString(bkey);
            int start_position = (int)m_message.Position;
            do
            {
                if (Next() == MessageTypes.MT_StartDictionary)
                    level++;
                else if (Next() == MessageTypes.MT_StartList)
                    level++;
                else if (Next() == MessageTypes.MT_End)
                    level--;
                GetNext();
            } while (level > 0 && More);
            int len = (int)m_message.Position - start_position;
            byte[] temp = new byte[len];
            Array.Copy(m_messageBuffer, start_position, temp, 0, len);
            encoded_value = temp;
        }

        public byte[] GetNextList()
        {
            int start_position = (int)m_message.Position;
            int level = 0;
            do
            {
                if (Next() == MessageTypes.MT_StartDictionary)
                    level++;
                else if (Next() == MessageTypes.MT_StartList)
                    level++;
                else if (Next() == MessageTypes.MT_End)
                    level--;
                GetNext();
            }
            while (level > 0);
            int len = (int)m_message.Position - start_position;
            byte[] temp = new byte[len];
            Array.Copy(m_messageBuffer, start_position, temp, 0, len);
            return temp;
        }

        public string GetNextString()
        {
            return ASCIIEncoding.ASCII.GetString((byte[])GetNext());
        }

        public int GetNextInt()
        {
            return (int)GetNext();
        }

        public object GetNext()
        {
            MessageTypes message;
            if (updated)
                message = Next(tmpConsumed);
            else
                message = Next();
            updated = false;
            switch (message)
            {
                case MessageTypes.MT_Int:
                    {
                        byte[] buffer = new byte[256];
                        int index;
                        for (index = 0; true; index++)
                        {
                            buffer[index] = (byte)m_message.ReadByte();
                            if (buffer[index] == (byte)'e')
                                break;
                        }
                        string s = string.Empty;
                        for (int jindex = 0; jindex < index; jindex++)
                        {
                            if (char.IsDigit((char)buffer[jindex]))
                                s += ((char)buffer[jindex]).ToString();
                        }
                        return (object)int.Parse(s);
                    }

                case MessageTypes.MT_String:
                    {
                        string s = string.Empty;
                        s += (char)tmpConsumed;
                        bool lengthcolon = false;
                        for (int i = 1; true; i++)
                        {
                            s += (char)m_message.ReadByte();
                            if (m_message.Position == m_message.Length)
                                return (object)new byte[0];
                            if (s[i] == ':' && !lengthcolon)
                            {
                                lengthcolon = true;
                                s = s.Remove(i);
                                break;
                            }
                            if (i == m_message.Length - m_message.Position)
                            {
                                //exception
                            }
                        }
                        int len = int.Parse(s);
                        s = string.Empty;
                        byte[] tmp = new byte[len];
                        m_message.Read(tmp, 0, len);
                        return (object)tmp;
                    }

                case MessageTypes.MT_StartList:
                    return (object)(MessageTypes.MT_StartList);
                case MessageTypes.MT_StartDictionary:
                    return (object)(MessageTypes.MT_StartDictionary);
                case MessageTypes.MT_End:
                    return (object)(MessageTypes.MT_End);
            }
            updated = false;
            return null;
        }
    }

    public enum MessageTypes
    {
        MT_String,
        MT_Int,
        MT_StartList,
        MT_StartDictionary,
        MT_End,
        MT_ByteString
    };
}
[/code]
July 7, 2006, 3:05 AM

Search