logo

Python 数据校验

王哲峰 / 2022-07-25


目录

Python 数据校验、类型提示库:

validators

安装 validators

$ pip install validators

基础的 validators

validators 中每一个 validator 是一个简单的函数, 函数参数为要验证的值, 一些函数可能有额外的关键字参数. 对于每一个函数, 如果验证成功, 则返回 True; 若验证失败, 则返回一个 ValidationFailure 对象.

  1. validators.between(value, min = None, max = None)
  2. validators.domain(value)
  1. validators.email(value, whitelist = None)
  1. validatorss.iban(value)
  1. validators.ip_address.ipv4(value)
  1. validators.ip_address.ipv6(value)
  1. validators.length(value, min = None, max = None)
  1. validators.mac_address(value)
  1. validators.slug(value)
  1. validators.truthy(value)
  2. validators.url(value, public = False)
  1. validators.i18n.fi.fi_business_id(business_id)
  1. validators.i18n.fi.fi_ssn(ssn)

装饰器、自定义验证函数

validators.utils.validator(func, *args, **kwargs)
validators.utils.ValidationFailure(func, args)
@validator
def is_even(value):
    return not (value % 2)

@validator
def is_positive(value):
    return value > 0

@validator
def is_string(value):
    return isinstance(value, str)

if __name__ == "__main__":
    print is_even(2)
    print is_even(3)
    print is_positive(4)
    print is_positive(0)
    print is_positive(-1)
    print is_string("hello")
    print is_string(3)

validator

安装 validator

$ pip install validator.py

示例

from validator import validate
from validator import Required, Not, Truthy, Blank, Range, Equals, In

rules = {
    "foo": [Required, Equals(123)],
    "bar": [Required, Truthy()],
    "baz": [In(["spam", "eggs", "bacon"])],
    "qux": [Not(Range(1, 100))]
}
passes = {
    "foo": 123,
    "bar": True,
    "baz": "spam",
    "qux": 101,
}
validate(rules, passes)

fails = {
    "foo": 321,
    "bar": False,
    "baz": "barf",
    "qux": 99
}
validate(rules, fails)

validator 内置验证器

条件验证

pet = {
    "name": "whiskers",
    "type": "cat",
}

cat_name_rules = {
    "name": [In("whiskers", "fuzzy", "tiger")]
}
dog_name_rules = {
    "name": [In("spot", "ace", "bandit")]
}
validation = {
    "type": [
        If(Equals("cat"), Then(cat_name_rules)),
        If(Equals("dog"), Then(dog_name_rules))
    ]
}
validate(validation, pet)

嵌套验证

validator = {
    "foo": [Required, Equals(1)],
    "bar": [
        Required, 
        {
        "baz": [],
        "qux": [
            Required, 
            {
                "quux": [Required, Equals(3)]
            }
        ]
        }
    ]
}
test_case = {
    "foo": 1,
    "bar": {
        "baz": 2,
        "qux": {
        "quux": 3
        }
    }
}
validate(validator, test_case)

自定义 validator 验证器

dictionary = {
    "foo": "bar"
}
validation = {
    "foo": [lambda x: x == "bar"]
}
validate(validation, dictionary)

voluptuous

安装 voluptuous

$ pip install voluptuous

voluptuous 字典数据验证

验证数据类型

  1. 先定义一个 schema
import traceback
from voluptuous import Schema, MultipleInvalid

schema = Schema({
    "q": str,
    "per_page": int,
    "page": int,
})
  1. 待验证数据
data = {
    "q": "hello world",
    "per_page": 20,
    "page": 10,
}
  1. 验证数据
try:
    schema(data)
except MultipleInvalid as e:
    print(e.errors)

验证必须字段

from voluptuous import Schema, MultipleInvalid

schema = Schema({
    "q": str,
    "per_page": int,
    "page": int,
})
data = {
    "q": "hello world",
    "page": 10
}
schema(data)
from voluptuous import Schema, Required, MultipleInvalid

schema = Schema({
    "q": str,
    Required("per_page"): int,
    "page": int,
})

data = {
    "q": "hello world",
    "page": 10,
}

try:
    schema(data)
except MultipleInvalid as e:
    print(e.errors)

验证数据长度、数据值范围

from voluptuous import Required, All, Length, Range

schema = Schema({
    Required("q"): All(str, Length(min = 1)),
    Required("per_page", default = 5): All(int, Range(min = 1, max = 20)),
    "page": All(int, Range(min = 0)),
})

voluptuous 验证其他类型数据

字面值(Literals)

from voluptuous import Schema

schema = Schema(1)
# success
schema(1)
# error
schema(2)

schema = Schema("a string")
# success
schema("a string")

类型(types)

from voluptuous import Schema

schema = Schema(int)

# success
schema(1)

# error
schema("one")

ULRs

from voluptuous import Schema, Url

schema = Schema(Url())

# success
schema("http://w3.org")

# error
try:
    schema("one")
    raise AssertionError("MultipleInvalid not raised")
except: MultipleInvalid as e:
    print(e.errors)

Lists

from voluptuous import Schema

schema = Schema([1, "a", "string"])
schema([1])
schema([1, 1, 1])
schema(["a", 1, "string", 1, "string"])
from voluptuous import Schema

schema = Schema(list)
schema([])
schema([1, 2])
from voluptuous import Schema

schema = Schema([])

# error
try:
    schema([1])
    raise AssertionError("MultipleInvalid not raised")
except MultipleInvalid as e:
    print(e.errors)

# success
schema([])

自定义函数

from datetime import datetime

def Date(fmt = "%Y-%m-%d"):
    return lambda v: datetime.strptime(v, fmt)

schema = Schema(Date())
schema("2013-03-03")
try:
    schema("2013-03")
    raise AssertionError("MultipleInvalid not raised")
except MultipleInvalid as e:
    print(e.errors)

字典

schema = Schema({
    1: "one",
    2: "two",
})
# success

schema({1: "one"})
from voluptuous import ALLOW_EXTRA

schema = Schema({2: 3}, extra = ALLOW_EXTRA)

# success
schema({1: 2, 2: 3})
from voluptuous import Schema, REMOVE_EXTRA

schema = Schema({2: 3}, extra = REMOVE_EXTRA)

# success
schema({1: 2, 2: 3})
schema = Schema({1: 2, 3: 4})
schema({3: 4})
from voluptuous import Schema

schema = Schema({1: 2, 3: 4}, required = True)

# error
try:
    schema({3: 4})
    raise AssertionError("MultipleInvalid not raised")
except MultipleInvalid as e:
    print(e.errors)

# success
schema({1: 2, 3: 4})
from voluptuous import Schema, Required

schema = Schema({
    Required(1): 2, 
    3: 4
})

# error
try:
    schema({3: 4})
    raise AssertionError("MultipleInvalid not raised")
except MultipleInvalid as e:
    print(e.errors)

# success
schema({1: 2})
from voluptuous import Schema, Optional

schema = Schema({
    1: 2,
    Optional(3): 4,
}, required = True)

# error
try:
    schema({})
except MultipleInvalid as e:
    print(e.errors)

# success
schema({1: 2})

# error
try:
    schema({1: 2, 4: 5})
    raise AssertionError("MultipleInvalid not raised")
except MultipleInvalid as e:
    print(e.errors)

# success
schema({1: 2, 3: 4})