liyang005

pymongo的基本操作

#To use mongodb, firstly you need use the following sentence to start mongo after $sudo dnf install mongodb, mongodb-service:
#$sudo service mongod start
#If succeed, it will be followed by :
#Redirecting to /bin/systemctl start mongod.service
#By using $top, you will see mongo
import pymongo
import datetime
from pymongo import MongoClient
client = MongoClient(host = 'localhost',port = 27017)#host='10,244,25,180'
#host can be given by $ifconfig #then you can see host from infomations with 'lo':
#host = '?',?not allways be localhost, in the current system, localhost has been replaced by GIS, 
#but if set ?=GIS, client.server_info() would not work successfully.
#The correct way is to use $mongo, to start mongo shell, you will see:
'''
MongoDB shell version v3.4.10
connecting to: mongodb://127.0.0.1:27017
MongoDB server version: 3.4.10
Welcome to the MongoDB shell.
'''
#From the above information, you would understand that host should be set by '127.0.0.1'.
#If host = '127.0.0.1' is set in the above python sentence, the following python sentence will work well.
client.server_info()#showing mongo service information
client.database_names()#mongo> show dbs, in my system, one database name is dc
db = client.dc#mongo>use dc#change current database to database dc
db.colletion_names()#>show collections, mysystem, one collection name is dbr001
cll = db.dbr001#for use dbr001 easily
cll.count()#return cll's number of records

cll.find_one()#等价于cll.find().limit(1)
cll.find_one().keys()#会获得当前document下的keys,进而可以依据条件搜索,遗憾的是,键名不能修改
cll.find({u'key1':abc}).limit(10)#为了看出u'key1'为什么类型的数据,可以先用cll.find_one()查看一下
    #from pymongo import IndexModel, ASCENDING, DESCENDING
    #index1 = IndexModel([("hello", DESCENDING),
    #                 ("world", ASCENDING)], name="hello_world")
    #index2 = IndexModel([("goodbye", DESCENDING)])
    #db.test.create_indexes([index1, index2])
    #db.collection_names()#[u'pb', u'test']
    #db.test.index_information()#{u'_id_': {u'key': [(u'_id', 1)], u'ns': u'public_bike.test', u'v': 2}, u'goodbye_-1': {u'key': [(u'goodbye', -1)],
    #u'ns': u'public_bike.test',u'v': 2},
    #u'hello_world': {u'key': [(u'hello', -1), (u'world', 1)],u'ns': u'public_bike.test',u'v': 2}}
    #db.test.drop()
cll.find({'$and':[{u'key1':{'$ne':u'AB'}},{u'key1':{'$ne':u'ab'}}]}).count()#,$or,表示或,如果u'key1'的类型是字符串,则有不等$ne,和等(默认不需要$eq)
#如果是fload,int等,则有$g/lt(e),大/小(等)于
a = cll.distinct(u'bikeid')#如果key的值有多个重复值,如果需要获得key取值的不重复序列,则可以用distinct可以获得key,bikeid的不重复序列.
#由于键bikeid的值的类型为字符串,因此排序结果会不合常理,比如(‘2’会比'1899'小),所以需要将其转为整数列表,方法如下
aa = []
for i in a:
    aa.append(np.int(i))
aa.sort(reverse=False)#将其正序排列,默认就是,即aa.sort()也能实现同样功能。

#cll.remove()#delete whole collection forever
db.test.update_many({u'x':12},{"$set":{u'x':22}})#将'x':12的所有doc都设置为'x':22
result = db.test.update_many({'y': 11}, {'$rename': {'y': 'x'}})#把所有'y':11的doc的key名字'y'更换为'x'.
#实际例子,public bike数据库中,the one in tesk computer with key name 'starttime',but in laptop computer, the key name is 'start_time'
#to make the same code works well in the two database, it is needed to rename 'starttime' to 'start_time',the following code fulfilled:
pb.update_many({},{"rename":{u'starttime':u'start_time'}}}#the first {} means filter with nothing
#cll = db.test,cll的作用就是和db.test一样。
for i in db.pb.find():#下面的语句能实现将'\r'去掉,但速度较慢                 
    if len(i[u'birthyear'])>0 and '\r' == i[u'birthyear'][-1]:
          db.pb.update_one({u'_id':i[u'_id']},{"$set":{u'birthyear':i[u'
          birthyear'].split('\r')[0]}})

client.close()#to disconnect mongodb
                                       



评论