deflogin(username, password): """log in and return uid""" logpage = "http://www.renren.com/ajaxLogin/login" data = {'email': username, 'password': password} login_data = urllib.urlencode(data) cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) res = opener.open(logpage, login_data) print"Login now ..." html = res.read() #print html
# Get uid print"Getting user id of you now" res = urllib2.urlopen("http://www.renren.com/home") html = res.read() # print html uid = re.search("'ruid':'(\d+)'", html).group(1) # print uid print"Login and got uid successfully" return uid
m = re.findall(pattern, html) #print len(m) if len(m) == 0: break for i in range(0, len(m)): no = m[i][0] uname = m[i][1] #print uname, no dict1[no] = uname pagenum += 1 print"Got %s 's friends list successfully." % str(uid) return dict1
我们再写个获取好友关系字典的函数,为了避免我们每次为了获取字典都要登录抓取。
1 2 3 4 5 6 7 8 9 10
defgetdict(uid): """cache dict of uid in the disk.""" try: with open(str(uid) + '.txt', 'r') as f: dict_uid = p.load(f) except: with open(str(uid) + '.txt', 'w') as f: p.dump(getfriends(uid), f) dict_uid = getdict(uid) return dict_uid
我们还需要一个用来判断两个人关系的函数,来判断我们好友之间的关系。
1 2 3 4 5 6 7
defgetrelations(uid1, uid2): """receive two user id, If they are friends, return 1, otherwise 0.""" dict_uid1 = getdict(uid1) if uid2 in dict_uid1: return1 else: return0
defgetgraph(username, password): """Get the Graph Object and return it. You must specify a Chinese font such as `SimHei` in ~/.matplotlib/matplotlibrc""" uid = login(username, password) dict_root = getdict(uid) # Get root tree
G = nx.Graph() # Create a Graph object for uid1, uname1 in dict_root.items(): # Encode Chinese characters for matplotlib **IMPORTANT** # if you want to draw Chinese labels, uname1 = unicode(uname1, 'utf8') G.add_node(uname1) for uid2, uname2 in dict_root.items(): uname2 = unicode(uname2, 'utf8') # Not necessary for networkx if uid2 == uid1: continue if getrelations(uid1, uid2): G.add_edge(uname1, uname2)
defdraw_graph(username, password, filename='graph.txt', label_flag=True, remove_isolated=True, different_size=True, iso_level=10, node_size=40): """Reading data from file and draw the graph.If not exists, create the file and re-scratch data from net""" print"Generating graph..." try: with open(filename, 'r') as f: G = p.load(f) except: G = getgraph(username, password) with open(filename, 'w') as f: p.dump(G, f) #nx.draw(G) # Judge whether remove the isolated point from graph if remove_isolated isTrue: H = nx.empty_graph() for SG in nx.connected_component_subgraphs(G): if SG.number_of_nodes() > iso_level: H = nx.union(SG, H) G = H # Ajust graph for better presentation if different_size isTrue: L = nx.degree(G) G.dot_size = {} for k, v in L.items(): G.dot_size[k] = v node_size = [G.dot_size[v] * 10for v in G] pos = nx.spring_layout(G, iterations=50) nx.draw_networkx_edges(G, pos, alpha=0.2) nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color='r', alpha=0.3) # Judge whether shows label if label_flag isTrue: nx.draw_networkx_labels(G, pos, alpha=0.5) #nx.draw_graphviz(G) plt.show()