Auditing Bit torrent

One of the strengths of bit torrent is that the primary data transfer protocol is entirely separate from the advertisement protocol. This also has created a strain both in discovering other users who have data, and keeping accurate reports of data that was transfered.

The first issue is one that has been developed for extensively, culminating in many extensions to the protocol which purport to make it easier to find other users. These include distributed trackers, PEX, DHT, among many others.

The second issues has been covered less throughly, since it is a problem that can not fundamentally be solved due to the distributed nature of the system. There is no real way to verify the legitimacy of statistics a client reports, since neither it nor any of the peers it has interacted with can be trusted.

One attempt to get a better sense of what is really going on is to create a client that actually interacts with with the data transfer protocol, to verify that reported statistics are not entirely inaccurate. This client does not interact in the traditional way, but will infrequently connect to peers and ask them to send it data – which it can then use to estimate the bandwidth of that client. This knowledge combined with knowledge of which clients have what portions of the data will allow the client to estimate the interactions that are taking place within the swarm.

These estimates can then be checked against reported statistics to discover when a client is misreporting its statistics.

The code below is not finished. It completes the initial functions of peer discovery and connection, but is not able to successfully download or monitor peers. The primary focus of work will be to implement the encryption protocol which is now standard for torrent traffic, so that the client is able to interact successfully with most users.

[python]
# Standalone Torrent Auditor
#
import socket
import time
import sys
import getopt
import random
import benc
import binascii
import select
import hashlib
import urllib

#Initialize a UDP Socket,
#and the other global info about who this client is
client = "AZ"+str(0x05)+"31";
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM);
s.connect(("msn.com",80));
myIP = s.getsockname()[0];
s.close();
myPort = 6886;
UDPSocket = socket.socket(socket.AF_INET,socket.SOCK_DGRAM);
UDPSocket.bind((myIP,myPort));
myID = "".join(chr(random.randrange(0, 256)) for i in xrange(20));
knownPeers=[];

#handle sending a raw UDP datagram
def sendData(data,host,port):
global UDPSocket;
#print ‘messaged %s:%d’%(host,port);
UDPSocket.sendto(data,0,(host,port));

#load in a .torrent file
def readFile(filePath):
f = open(filePath, ‘r’);
data = ”.join(f.readlines());
structure = benc.bdecode(data);
return structure;

#register with the tracker to get peers
def register(torrent):
url = torrent[‘announce’];
ihash = hashlib.sha1(benc.bencode(torrent[‘info’])).digest();
query = urllib.urlencode({‘info_hash’:ihash,
‘peer_id’:myID,
‘port’:myPort,
‘uploaded’:0,
‘downloaded’:0,
‘left’:0,
‘compact’:1,
‘event’:’started’});
url += "?"+query;
trackerhandle = urllib.urlopen(url);
trackerdata = ”.join(trackerhandle.readlines());
trackerhandle.close();
parseddata = benc.bdecode(trackerdata);
initialnodes = parseddata[‘peers’];
peers = [];
while len(initialnodes) > 5:
ip = initialnodes[0:4];
port = initialnodes[4:6];
initialnodes = initialnodes[6:];
peers.append({‘state’:0,’ip’:socket.inet_ntoa(ip),’ihash’:ihash
,’port’:ord(port[0])*256+ord(port[1])});
return peers;

def AnnouncePeer(myID,key,token,lp,host,port):
data = {‘q’:’announce_peer’,’a’:{‘id’:myID,’info_hash’:key,
‘token’:token,’port’:lp},’v’:client,’y’:’q’,’t’:str(0x05)+str(0x05)};
sendData(benc.bencode(data),host,port);

def parseQuery():
global UDPSocket,knownPeers;
(msg,(hn,hp)) = UDPSocket.recvfrom(4096); #should be more than enough
found = 0;
for p in knownPeers:
if p[‘ip’] == hn and p[‘port’] == hp:
found = 1;
p[‘state’] &= 2;
print msg;
if not found:
print msg;
knownPeers.append({‘state’:2,’ip’:hn,’port’:hp,’ihash’:0});
#data = benc.bdecode(msg);

#check the type of message here, maybe
#hisid = data[‘r’][‘id’];
#nodes = data[‘r’][‘nodes’];
#l = len(nodes)/26;
#for i in range(0,l):
# nid = nodes[(26*i):(26*i+20)];
# nhost = nodes[(26*i+20):(26*i+24)];
# nport = nodes[(26*i+24):(26*i+26)];
# knownHosts[nid]=socket.inet_ntoa(nhost);
# knownPorts[nid]=ord(nport[0])*256+ord(nport[1]);
# if bitdif(nid,targetID) < bitdif(hisid,targetID):
# FindNodeReq(myID,targetID,knownHosts[nid],knownPorts[nid]);
#knownHosts[hisid] = hn;
#knownPorts[hisid] = int(hp);
#return hisid;

def initiateConns():
global knownPeers;
inited = 0;
for p in knownPeers:
if(p[‘state’] == 0 and inited < 5): #uncontacted
announce = str(0x19) + ‘BitTorrent protocol’;
announce += str(0x0)*8;
announce += p[‘ihash’];
announce += myID;
p[‘state’] = 1; #contacted
inited += 1;
sendData(announce,p[‘ip’],p[‘port’]);
return inited == 0;

def MainLoop():
global UDPSocket;
print "Communicating",
rate = 0;
while 1:
(x,y,z) = select.select([UDPSocket],[],[],1) #wait to receive something
if len(x):
parseQuery();
else:
if initiateConns():
return;
continue; #we don’t care much about errors, since it’s all datagrams

def usage():
print "Usage:";
print "client –file=loc.torrent";
print "Will report on statistics for the desired torrent";

def main():
global myID,knownPeers;
filePath = "default.torrent";
try:
opts, args = getopt.getopt(sys.argv[1:], "hf:", ["help", "file="])
except getopt.GetoptError, err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized";
usage();
sys.exit(2);
for o, a in opts:
if o in ("-h", "–help"):
usage();
sys.exit();
elif o in ("-f", "–file"):
filePath = a;
else:
assert False, "unhandled option";
print "Loading Info… ",
info = readFile(filePath);
print "okay";
print "Detecting Swarm… ",
seeds = register(info);
print len(seeds), " peers returned";
knownPeers.extend(seeds);
print "Entering Main Loop";
MainLoop();
print "Finished Snapshot";
print "Discovered Swarm State:";
for p in knownPeers:
print p[‘ip’],": ",
if(‘has’ in p):
print p[‘has’],
if(‘speed’ in p):
print p[‘speed’];
else:
print "unconnectable";

if __name__ == "__main__":
main()

UDPSocket.close()
[/python]