tested on:
Linux: CentOS 5.3
Python: 2.5
Hadoop: 0.20.1
Hbase: 0.20.0
from unittest import TestCase, main
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import ColumnDescriptor, Mutation, BatchMutation, NotFound
class HbaseWriter:
def __init__(self, netloc, port, table="webpages"):
self.tableName = table
self.transport = TTransport.TBufferedTransport(
TSocket.TSocket(netloc, port))
self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
self.client = Hbase.Client(self.protocol)
self.transport.open()
tables = self.client.getTableNames()
if self.tableName not in tables:
self.__createTable()
def __del__(self):
self.transport.close()
def __createTable(self):
self.client.createTable(self.tableName,
[ColumnDescriptor(name="contents:", maxVersions=1, compression="BLOCK"),])
def reverseUrl(self, url):
link = filter(None, url.split("//"))[-1]
hops = filter(None, link.split("/"))
domain = hops[0].split(".")
domain.reverse()
domain = '.'.join(domain)
hops[0] = domain
return '/'.join(hops)
def write(self, url, content):
row = self.reverseUrl(url)
mutations = [Mutation(column="contents:", value=content)]
self.client.mutateRow(self.tableName, row, mutations)
class TestHbaseWriter(TestCase):
def setUp(self):
self.writer = HbaseWriter("192.168.0.1", 9090, "test")
def tearDown(self):
name = self.writer.tableName
client = self.writer.client
client.disableTable(name)
client.deleteTable(name)
def testReverseUrl(self):
self.assertEquals(self.writer.reverseUrl("http://www.a.com"), "com.a.www")
self.assertEquals(self.writer.reverseUrl("http://www.a.com/"), "com.a.www")
self.assertEquals(self.writer.reverseUrl("http://a.com"), "com.a")
self.assertEquals(self.writer.reverseUrl("http://www.b.com/foo"), "com.b.www/foo")
self.assertEquals(self.writer.reverseUrl("aaa.bbb.ccc.com.cn/foo1/foo2"), "cn.com.ccc.bbb.aaa/foo1/foo2")
def testCreate(self):
tableName = self.writer.tableName
client = self.writer.client
self.assertTrue(self.writer.tableName in client.getTableNames())
columns = dict()
columns["contents"] = ColumnDescriptor(name="contents", maxVersions=1, compression="BLOCK")
cds = client.getColumnDescriptors(tableName)
for name,column in cds.items():
self.assertTrue(column.name in columns)
def testWrite(self):
tableName = self.writer.tableName
client = self.writer.client
data = {"http://www.a.com":"com.a.www",
"http://www.a.com/bbb":"com.a.www/bbb",
"http://www.foo.com/foo":"foo"}
for url, content in data.items():
self.writer.write(url, content)
scannerId = client.scannerOpen(tableName, "", ["contents:",])
while True :
try:
result = client.scannerGet(scannerId)
except NotFound:
break
row = result.row
contents = result.columns["contents:"].value
url = "http://" + self.writer.reverseUrl(row)
self.assertTrue(url in data)
self.assertEqual(data[url], contents)
client.scannerClose(scannerId)
if __name__ == "__main__":
main()
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import ColumnDescriptor, Mutation, BatchMutation, NotFound
class HbaseWriter:
def __init__(self, netloc, port, table="webpages"):
self.tableName = table
self.transport = TTransport.TBufferedTransport(
TSocket.TSocket(netloc, port))
self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
self.client = Hbase.Client(self.protocol)
self.transport.open()
tables = self.client.getTableNames()
if self.tableName not in tables:
self.__createTable()
def __del__(self):
self.transport.close()
def __createTable(self):
self.client.createTable(self.tableName,
[ColumnDescriptor(name="contents:", maxVersions=1, compression="BLOCK"),])
def reverseUrl(self, url):
link = filter(None, url.split("//"))[-1]
hops = filter(None, link.split("/"))
domain = hops[0].split(".")
domain.reverse()
domain = '.'.join(domain)
hops[0] = domain
return '/'.join(hops)
def write(self, url, content):
row = self.reverseUrl(url)
mutations = [Mutation(column="contents:", value=content)]
self.client.mutateRow(self.tableName, row, mutations)
class TestHbaseWriter(TestCase):
def setUp(self):
self.writer = HbaseWriter("192.168.0.1", 9090, "test")
def tearDown(self):
name = self.writer.tableName
client = self.writer.client
client.disableTable(name)
client.deleteTable(name)
def testReverseUrl(self):
self.assertEquals(self.writer.reverseUrl("http://www.a.com"), "com.a.www")
self.assertEquals(self.writer.reverseUrl("http://www.a.com/"), "com.a.www")
self.assertEquals(self.writer.reverseUrl("http://a.com"), "com.a")
self.assertEquals(self.writer.reverseUrl("http://www.b.com/foo"), "com.b.www/foo")
self.assertEquals(self.writer.reverseUrl("aaa.bbb.ccc.com.cn/foo1/foo2"), "cn.com.ccc.bbb.aaa/foo1/foo2")
def testCreate(self):
tableName = self.writer.tableName
client = self.writer.client
self.assertTrue(self.writer.tableName in client.getTableNames())
columns = dict()
columns["contents"] = ColumnDescriptor(name="contents", maxVersions=1, compression="BLOCK")
cds = client.getColumnDescriptors(tableName)
for name,column in cds.items():
self.assertTrue(column.name in columns)
def testWrite(self):
tableName = self.writer.tableName
client = self.writer.client
data = {"http://www.a.com":"com.a.www",
"http://www.a.com/bbb":"com.a.www/bbb",
"http://www.foo.com/foo":"foo"}
for url, content in data.items():
self.writer.write(url, content)
scannerId = client.scannerOpen(tableName, "", ["contents:",])
while True :
try:
result = client.scannerGet(scannerId)
except NotFound:
break
row = result.row
contents = result.columns["contents:"].value
url = "http://" + self.writer.reverseUrl(row)
self.assertTrue(url in data)
self.assertEqual(data[url], contents)
client.scannerClose(scannerId)
if __name__ == "__main__":
main()
In today’s world, marketers reach inside the home and attempt to figure out not what’s good for your daughter, because that is not their business, but what deep desires they can manipulate, stimulate and ostensibly satisfy in order to produce cold, hard cash.
Nothing changes your opinion of a friend so surely as success – yours or his.
Always bear in mind that your own resolution to succeed is more important than any one thing.
But the body is deeper than the soul and its secrets inscrutable.
If your success is not on your own terms, if it looks good to the world but does not feel good in your heart, it is not success at all.
They always say time changes things, but you actually have to change them yourself.
colcmlfu…
colcmlfu…
cbrwpvdk…
cbrwpvdk…