Example 12. Python API



Set up Python API


Unzip the DolphinDB Python API package to a directory on a computer with Python development environment. Make sure the file setup.py exists under the directory, and run the following command to finish setting up Python API.



python setup.py install



To update an installed Python API, we can use



python setup.py install --force




Python API functionalities


We can use the session method to create a DolphinDB connection object in Python, and the connect method to establish connections with DolphinDB databases.



import pandas as pd

import numpy as np

import dolphindb as ddb


conn=ddb.session()

conn.connect( 'localhost', 8848)



In the following example, we connect to a running local DolphinDB server with port number 8848.


For a DolphinDB connection object in Python, we provide the following methods:




>>> df = pd.DataFrame({'id': np.int32([1, 2, 3, 4, 3]), 'value':  np.double([7.8, 4.6, 5.1, 9.6, 0.1]), 'x': np.int32([5, 4, 3, 2, 1])})

>>> conn.upload({'t1': df})






>>> conn.run('wavg', [100, 60, 300], [1, 1.5, 2])

164.0





>>> df = pd.DataFrame({'id': np.int32([1, 2, 3, 4, 3]), 'value':  np.double([7.8, 4.6, 5.1, 9.6, 0.1]), 'x': np.int32([5, 4, 3, 2, 1])})

>>> conn.upload({'t1': df})

>>> conn.run('t1.value.avg()')

5.44


>>> conn.run('1 2 NULL')

array([  1.,   2.,  nan])


>>> z= conn.run("dict(1 2 3, `IBM`MSFT`GOOG)")

>>> z

{1: 'IBM', 2: 'MSFT', 3: 'GOOG'}

>>> z[1]

'IBM'


>>> conn.run('(1..6).reshape(3:2)')

(array([[1, 4],

      [2, 5],

      [3, 6]]), None, None)


>>> table_str = "n=20000\n"

>>> table_str += "t1=table(09:30:00+rand(18000, n) as timestamp, rand(100, n) as id, 100*(1+rand(100, n)) as qty, 5.0+rand(100.0,n) as price);\n"

>>> table_str += "select * from t1 where price>9"                                                  

>>> df = conn.run(table_str)

>>> df

     timestamp  id    qty       price

0      13:50:29   8    800   78.039556

1      09:45:13  39   6000   36.624369

2      09:34:39  45   5000   11.116828

3      11:54:26  26   8700   69.652368

4      12:07:52  20   9600   84.086700

...         ...  ..    ...         ...

19222  14:25:47  26   5500   78.223695

19223  13:08:36  49   7500  100.907622

19224  11:07:52  60   9500   57.600072

19225  11:47:36  31   9600   12.589440

19226  10:41:01  93    500   41.886152

[19227 rows x 4 columns]


>>> df.columns

Index([u'timestamp', u'id', u'qty', u'price'], dtype='object')





>>> dt = conn.table(data={'sym': ['A', 'B', 'B', 'A', 'A'], 'vol': [1, 3, 2, 5, 4], 'price': [16, 31, 28, 19, 22]})


>>> dt.tableName()

'Tb2e146a8'



Please note that we will get a different table name each time we use the tableName method, but this table name will remain the same in all the following examples.




>>> dt.showSQL()

'select price,sym,vol from Tb2e146a8'


>>> dt['price'].showSQL()

'select price from Tb2e146a8'


>>> dt.select('price').showSQL()

'select price from Tb2e146a8'


>>> dt.select("price").where(dt.price > 20).showSQL()

'select price from Tb2e146a8 where (price > 20)'


>>> dt.select("price").where(dt.price > 20).sort('price').showSQL()

'select price from Tb2e146a8 where (price > 20) order by price'


>>> dt.select(['sym','vol']).where(dt.price > 20).sort(['sym','vol desc']).showSQL()

'select sym,vol from Tb2e146a8 where (price > 20) order by sym,vol desc'


>>> dt[dt.price > 20].showSQL()

'select price,sym,vol from Tb2e146a8 where (price > 20)'





>>> dt.toDF()

  price sym  vol

0     16   A    1

1     31   B    3

2     28   B    2

3     19   A    5

4     22   A    4


>>> dt['price'].toDF()

  price

0     16

1     31

2     28

3     19

4     22


>>> dt.select('price').toDF()

  price

0     16

1     31

2     28

3     19

4     22


>>> dt.select("price").where(dt.price > 20).toDF()

  price

0     31

1     28

2     22


>>> dt.select("price").where(dt.price > 20).sort('price').toDF()

  price

0     22

1     28

2     31


>>> dt.select(['sym','vol']).where(dt.price > 20).sort(['sym','vol desc']).toDF()

 sym  vol

0   A    4

1   B    3

2   B    2


>>> dt[dt.price > 20].toDF()

  price sym  vol

0     31   B    3

1     28   B    2

2     22   A    4





>>> dt[dt.price > 20].groupby('sym').agg('sum').showSQL()

'select sum(price),sum(vol) from Tb2e146a8 where (price > 20) group by sym'


>>> dt[dt.price > 20].groupby('sym').agg('sum').toDF()

 sym  sum_price  sum_vol

0   B         59        5

1   A         22        4


>>> dt.groupby('sym').agg({'vol':[ddb.sum2]}).showSQL()

'select sum2(vol) from Tb2e146a8 group by sym'


>>> dt.groupby('sym').agg({'vol':[ddb.sum2]}).toDF()

 sym  sum2_vol

0   A        42

1   B        13


>>> dt.groupby('sym').agg2([ddb.wsum, ddb.wavg], [("price", "vol")]).showSQL()

'select wavg(price,vol),wsum(price,vol) from Tb2e146a8 group by sym'


>>> dt.groupby('sym').agg2([ddb.wsum, ddb.wavg], [("price", "vol")]).toDF()

 sym  wavg_price  wsum_price

0   A        19.9         199

1   B        29.8         149





>>> dt.contextby('sym').agg({'price':[ddb.sum]}).showSQL()

'select sum(price),sym from Tb2e146a8 context by sym'


>>> dt.contextby('sym').agg({'price':[ddb.sum]}).toDF()

  sum_price sym

0         57   A

1         57   A

2         57   A

3         59   B

4         59   B


>>> dt.contextby('sym').agg({'price':[ddb.cumsum]}).showSQL()

'select cumsum(price),sym from Tb2e146a8 context by sym'


>>> dt.contextby('sym').agg({'price':[ddb.cumsum]}).toDF()

  cumsum_price sym

0            16   A

1            35   A

2            57   A

3            31   B

4            59   B





>>> dt.pivotby(index='vol', column='sym', value='price').showSQL()

'select price from Tb2e146a8 pivot by vol,sym'


>>> dt.pivotby(index='vol', column='sym', value='price').toDF()

  vol     A     B

0    1  16.0   NaN

1    2   NaN  28.0

2    3   NaN  31.0

3    4  22.0   NaN

4    5  19.0   NaN





>>> dt1 = conn.table(data={'sym': ['AAPL', 'AAPL', 'AAPL', 'AMZN', 'AMZN'], 'date': ['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06'], 'price': [122, 125, 123, 756, 767]})

>>> dt2 = conn.table(data={'sym': ['AAPL', 'AAPL', 'AMZN', 'AMZN', 'AMZN'], 'date': ['2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-10'], 'vol': [1100, 1400, 1600, 2200, 1800]})


>>> dt1.toDF();

        date  price   sym

0  2017-01-02    122  AAPL

1  2017-01-03    125  AAPL

2  2017-01-04    123  AAPL

3  2017-01-05    756  AMZN

4  2017-01-06    767  AMZN


>>> dt2.toDF();

        date   sym   vol

0  2017-01-04  AAPL  1100

1  2017-01-05  AAPL  1400

2  2017-01-06  AMZN  1600

3  2017-01-07  AMZN  2200

4  2017-01-10  AMZN  1800


>>> dt1.merge(dt2, left_on=['sym','date'], right_on=['sym','date']).showSQL()

'select Tc288e78d.date as rhs_date,Tc288e78d.sym as rhs_sym,Tc288e78d.vol as rhs_vol,Td8323cd0.date as lhs_date,Td8323cd0.price as lhs_price,Td8323cd0.sym as lhs_sym from ej(Td8323cd0,Tc288e78d,`sym`date,`sym`date)'


>>> dt1.merge(dt2, left_on=['sym','date'], right_on=['sym','date']).toDF()

    lhs_date  lhs_price lhs_sym    rhs_date rhs_sym  rhs_vol

0  2017-01-04        123    AAPL  2017-01-04    AAPL     1100

1  2017-01-06        767    AMZN  2017-01-06    AMZN     1600


>>> dt1.merge(dt2, left_on=['sym','date'], right_on=['sym','date'], how='left').showSQL()

'select Tc288e78d.date as rhs_date,Tc288e78d.sym as rhs_sym,Tc288e78d.vol as rhs_vol,Td8323cd0.date as lhs_date,Td8323cd0.price as lhs_price,Td8323cd0.sym as lhs_sym from lj(Td8323cd0,Tc288e78d,`sym`date,`sym`date)'


>>> dt1.merge(dt2, left_on=['sym','date'], right_on=['sym','date'], how='left').toDF()

    lhs_date  lhs_price lhs_sym    rhs_date rhs_sym  rhs_vol

0  2017-01-02        122    AAPL                          NaN

1  2017-01-03        125    AAPL                          NaN

2  2017-01-04        123    AAPL  2017-01-04    AAPL   1100.0

3  2017-01-05        756    AMZN                          NaN

4  2017-01-06        767    AMZN  2017-01-06    AMZN   1600.0





>>> dt1 = conn.table(data={'id': ['A','A','A','B','B'], 'date': pd.to_datetime(['2017-02-06', '2017-02-08', '2017-02-10', '2017-02-07', '2017-02-09']), 'price': [22, 23, 20, 100, 102]})

>>> dt2 = conn.table(data={'id': ['A','A','B','B','B'], 'date': pd.to_datetime(['2017-02-07', '2017-02-10', '2017-02-07', '2017-02-08', '2017-02-10'])})


>>> dt1.toDF()

                           date id  price

0  2017.02.06T00:00:00.000000000  A     22

1  2017.02.08T00:00:00.000000000  A     23

2  2017.02.10T00:00:00.000000000  A     20

3  2017.02.07T00:00:00.000000000  B    100

4  2017.02.09T00:00:00.000000000  B    102


>>> dt2.toDF()

                           date id

0  2017.02.07T00:00:00.000000000  A

1  2017.02.10T00:00:00.000000000  A

2  2017.02.07T00:00:00.000000000  B

3  2017.02.08T00:00:00.000000000  B

4  2017.02.10T00:00:00.000000000  B


>>> dt2.merge_asof(dt1, on=['id','date']).showSQL()

'select T38907df4.date as lhs_date,T38907df4.id as lhs_id,T97b7d897.date as rhs_date,T97b7d897.price as rhs_price,T97b7d897.id as rhs_id from aj(T38907df4,T97b7d897,`id`date,`id`date)'


>>> dt2.merge_asof(dt1, on=['id','date']).toDF()

                       lhs_date lhs_id                       rhs_date  rhs_price rhs_id

0  2017.02.07T00:00:00.000000000      A  2017.02.06T00:00:00.000000000         22      A

1  2017.02.10T00:00:00.000000000      A  2017.02.10T00:00:00.000000000         20      A

2  2017.02.07T00:00:00.000000000      B  2017.02.07T00:00:00.000000000        100      B

3  2017.02.08T00:00:00.000000000      B  2017.02.07T00:00:00.000000000        100      B

4  2017.02.10T00:00:00.000000000      B  2017.02.09T00:00:00.000000000        102      B



Download source code here.