FullTextSearchAnalytics/searchlink2askwensen.py at master · cnukaus/FullTextSearchAnalytics · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from bs4 import BeautifulSoup as BS
import urllib.request as urllib2
<<<<<<< HEAD

import datetime
import re
def calcprice(filename):

	try:
		f = open(filename, 'r')
		data = f.read()
		rows = data.split('\n')

		for row in rows:
			print (re.search("(?: 1+)\d*\.?\d*",row)+","+re.search("\d{4}-\d{2}-\d{2}",row))

	except Exception as e:
    	print(e)


#(?<= {6})\d*\.?\d* regex

#( )\1+
#(?<=( )\1+)\d*\.?\d* regex
# +  +means as many preceding letter as possible
=======
import re
import datetime

def calprice():
	pass
#(?<= {6})\d*\.?\d* regex
#( )\1+
#(?<=( )\1+)\d*\.?\d* regex
>>>>>>> d6453c7a2a7f83d9b8d51851de25a62081bec153
#curl -X POST --data '{"method":"xdag_get_block_info", "params":["dfKdPEdqac23INOdR/juDDY1LKFRePFk"], "id":1}' localhost:16005
def search(url):


	global result
	global dt
	#print ("search "+url)
	storeList=[]
	balanceList=[]
	try:

		html = urllib2.urlopen(url)
		soup = BS(html,'html.parser')
			#print (tag.next_element)
			#print (tag.nextsibling)
			#print (tag.nextsibling.nextsibling)

		flag=0

		for eliminate in soup.find_all('h4'):
			if eliminate.text=='Block as address':
				flag=flag+1

		if flag==0:
			return('',storeList)	#Block as transaction need to be removed


		for tag in soup.find_all('a', href=True):
			if tag['href'].startswith("/block/") == True:
			   storeList.append(tag['href'][7:]) #remove string head '/block/'


		for tag in soup.find_all("div"):

			tds = tag.find_all("td") # you get list
			#print('text:', tds[0].get_text()) # get element [0] from list
			#print('value:', tds[1].get_text())


			if tag.text=="Balance":
				balanceList.append(tag.find_next('span').text)
				result.append(url+","+tag.find_next('span').text,dt)
				f = open("result.csv", 'r+')
				f.write(url+","+tag.find_next('span').text,dt+'\n')
		#print (storeList)
		if len(balanceList)==0:
			balance=''
		else:
			balance=balanceList[0]
		#print ("bal:"+balance)
		return (balance, storeList)#.replace("<td>",u"余额:").replace("<a href=>\"/block","Addr:"))#nextsibling.text)
	except:
		return ('', storeList)
	#soup.find("th", text="Balance").find_next_sibling("td").text
			#b.body.findAll(text=re.compile('Trump wins .+? uncertain future'))

	#result=soup.body.findAll(text=re.compile('Balance</th>.+?</td>'))
	#print (result.text)
	'''elem =soup.findAll('td', text = re.compile(ur'Fixed text:(.*)', re.DOTALL), attrs = {'class': 'pos'})#('a', {'title': 'title here'})
	# <th scope="row">Balance</th><td>0.950165999
	elem[0].text'''

if __name__ == "__main__": ## If we are not importing this:
<<<<<<< HEAD
	calcprice('dfk balance.txt')
=======
>>>>>>> d6453c7a2a7f83d9b8d51851de25a62081bec153
	f = open("addrlist.csv", 'r+')
	data = f.read()
	rows = data.split('\n')
	newrows=[]
	dt=datetime.datetime.today().strftime('%Y-%m-%d')
	result=[]

	for row in rows:
		readlist=[]  #G6jTFKRkFlKj67zIdOZJ4jMjuhCe6oOg  BLOCK as address, fails

		(prt1, readlist) = search("https://explorer.xdag.io/block/"+row)

		for NewItem in readlist:
			if NewItem not in newrows:
				newrows.append(NewItem)

	print (rows)
	for row in newrows:
		try:
			readlist=[]  #G6jTFKRkFlKj67zIdOZJ4jMjuhCe6oOg  BLOCK as address, fails

			(prt1, readlist) = search("https://explorer.xdag.io/block/"+row)

			for NewItem in readlist:
				if NewItem not in rows and NewItem not in newrows:
					rows.append(NewItem)
					print ("final:"+NewItem)
					f.write(NewItem+'\n')
		except:
			pass
		#To make sure that you're data is written to disk, use file.flush() followed by os.fsync(file.fileno()).
		#(prt1, readlist) = search("https://explorer.xdag.io/block/"+row)