9

I am trying to validate an inserted document against a schema, and was trying to find a way to validate the inserted document.

There are libraries like MongoEngine that say they do the work, but is there a way to do document validation directly via pymongo ?

omrakhur
  • 1,362
  • 2
  • 24
  • 48

4 Answers4

20

The python driver docs are indeed a little light on how to use the db.command. Here is a complete working example:

    from pymongo import MongoClient
    from collections import OrderedDict
    import sys
    
    client = MongoClient()   # supply connection args as appropriate 
    db = client.testX
    
    db.myColl.drop()
    
    db.create_collection("myColl")  # Force create!
    
    #  $jsonSchema expression type is prefered.  New since v3.6 (2017):
    vexpr = {"$jsonSchema":
      {
             "bsonType": "object",
             "required": [ "name", "year", "major", "gpa" ],
             "properties": {
                "name": {
                   "bsonType": "string",
                   "description": "must be a string and is required"
                },
                "gender": {
                   "bsonType": "string",
                   "description": "must be a string and is not required"
                },
                "year": {
                   "bsonType": "int",
                   "minimum": 2017,
                   "maximum": 3017,
                   "exclusiveMaximum": False,
                   "description": "must be an integer in [ 2017, 3017 ] and is required"
                },
                "major": {
                   "enum": [ "Math", "English", "Computer Science", "History", None ],
                   "description": "can only be one of the enum values and is required"
                },
                "gpa": {
                   # In case you might want to allow doubles OR int, then add
                   # "int" to the bsonType array below:
                   "bsonType": [ "double" ],
                   "minimum": 0,
                   "description": "must be a double and is required"
                }
             }
      }
    }
    
    # Per the docs, args to command() require that the first kev/value pair
    # be the command string and its principal argument, followed by other
    # arguments.  There are two ways to do this:  Using an OrderDict:
    cmd = OrderedDict([('collMod', 'myColl'),
            ('validator', vexpr),
            ('validationLevel', 'moderate')]
    db.command(cmd)
    
    # Or, use the kwargs construct:
    # db.command('collMod','myColl', validator=vexpr, validationLevel='moderate')

    try:
        db.myColl.insert({"x":1})
        print "NOT good; the insert above should have failed."
    except:
        print "OK. Expected exception:", sys.exc_info()    
    
    try:
        okdoc = {"name":"buzz", "year":2019, "major":"Math", "gpa":3.8}
        db.myColl.insert(okdoc)
        print "All good."
    except:
        print "exc:", sys.exc_info()    
Buzz Moschetti
  • 7,057
  • 3
  • 23
  • 33
1

MongoDB supports document validation at the engine level so you'll pick it up via pymongo. You declare your "schema" (rules actually) to the engine. Here's a great place to start: https://docs.mongodb.com/manual/core/document-validation/

Buzz Moschetti
  • 7,057
  • 3
  • 23
  • 33
1

You can make a separated JSON file for your Document Validations Schema, like this:

    {
      "collMod": "users",
      "validator": {
        "$jsonSchema": {
          "bsonType": "object",
          "required": ["email", "password","name"],
          "properties": {
            "email": {
              "bsonType": "string",
              "description": "Correo Electrónico"
            },
            "password": {
              "bsonType": "string",
              "description": "Una representación Hash de la contraseña"
            },
            "name": {
              "bsonType": "object",
              "required": ["first", "last"],
              "description": "Objeto que separa los nombres y apellidos",
              "properties":  {
                "first": {
                  "bsonType": "string",
                  "description": "Primer y segundo nombre"
                },
                "last": {
                  "bsonType": "string",
                  "description": "Primer y segundo apellido"
                }
              }
            },
          }
        }
      }
    }

Then you can use in python script, example:

from pymongo import MongoClient
import json #parse JSON  file as dict
from collections import OrderedDict #preserve the order (key, value) in the gived insertions on the dict

client = MongoClient("your_mongo_uri")
db = client.your_db_name 

with open('your_schema_file.json', 'r') as j:
    d = json.loads(j.read())

d = OrderedDict(d)

db.command(d)

OrderedDict Info

collMod Info

Schema Validation Info

0

I know 2 options to deal with:

  1. By creating or setting schema for collection, so any insertions will be checked against it on server side, rejected or warned depending on validationAction The following code demonstrates scheme creation and testing:
import pymongo

mongo_client = MongoClient(url=...,
                          port=...,
                          username=...,
                          password=...,
                          authSource=...,
                          authMechanism=...,
                          connect=True, )
mongo_client.server_info()
db = mongo_client.your_db
users = db.create_collection(name="users",
                             validator={"$jsonSchema": {
                                 "bsonType": "object",
                                 "required": ["username"],
                                 "properties": {
                                     "username": {
                                         "bsonType": "string",
                                         "pattern": "[a-z0-9]{5,15}",
                                         "description": "user name (required), only lowercase letters "
                                                        "and digits allowed, from 5 to 15 characters long"
                                     },
                                     "email": {
                                         "bsonType": "string",
                                         "description": "User's email (optional)"
                                     },
                                 }
                             }},
                             validationAction="error",
                             )
# Inserting user document that fits the scheme
users.insert_one({"username": "admin", "email": "some_admin_mail"})
# Insertion below will be rejected with "pymongo.errors.WriteError: Document failed validation, full error"
# caused by too short username (root)
users.insert_one({"username": "root", "email": "some_root_mail"})

  1. You can think about your Mongo's documents as ordinary JSON entities and check them on the client code side using standard JSON scheme validation
from jsonschema import validate
from jsonschema.exceptions import ValidationError

db = MongoClient(...).your_db
schema = {
    "type": "object",
    "required": ["username"],
    "properties": {
        "username": {"type": "string", "pattern": "[a-z0-9]{5,15}"},
        "email": {"type": "string"},
    },
}
try:
    new_user = {"username": "admin", "email": "some_admin_mail"}
    # No exception will be raised in validation below
    validate(instance=new_user, schema=schema)
    db.users.insert_one(new_user)

    new_user = {"username": "root", "email": "some_root_mail"}
    # Exception <ValidationError: 'root' does not match '[a-z0-9]{5,15}'> will be raised
    validate(instance=new_user, schema=schema)
    db.users.insert_one(new_user)
except ValidationError:
    # Performing error
em2er
  • 811
  • 5
  • 15