题目:https://blog.csdn.net/qq_33254766/article/details/135070656
找到文本中的所有日期
text = "重要日期:2023-12-25, 2024-01-01"
dates = re.findall(r'\d{4}-\d{2}-\d{2}', text)
assert dates == ["2023-12-25", "2024-01-01"]
验证电子邮件地址
email = "example@test.com"
is_valid = bool(re.match(r'^[\w\.-]+@[\w\.-]+\.\w+$', email))
assert is_valid is True
分割日志文件
log = "INFO:2023-12-17:This is log 1\nERROR:2023-12-18:This is log 2"
entries = re.split(r'\n', log)
assert entries == ["INFO:2023-12-17:This is log 1", "ERROR:2023-12-18:This is log 2"]
匹配电话号码
phone = "(123) 456-7890"
is_phone = bool(re.match(r'\(\d{3}\)\s\d{3}-\d{4}', phone))
assert is_phone is True
提取URL
text = "Visit https://example.com and http://test.org"
urls = re.findall(r'https?://\S+', text)
assert urls == ["https://example.com", "http://test.org"]
验证密码强度
password = "Aa1!aa11"
is_strong = bool(re.match(r'(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}', password))
assert is_strong is True
寻找重复的单词
text = "This is a test test text"
duplicate = re.search(r'\b(\w+)\s+\1\b', text)
assert duplicate.group() == "test test"
提取括号内的内容
text = "This is a (sample) text"
content = re.findall(r'\((.*?)\)', text)
assert content == ["sample"]
识别货币金额
text = "The price is $100.00 or €50"
amounts = re.findall(r'[$€]\d+(?:\.\d{2})?', text)
assert amounts == ["$100.00", "€50"]
识别时间格式
text = "The current time is 14:30:15 or sometimes 09:45"
times = re.findall(r'\b\d{1,2}:\d{2}(?::\d{2})?\b', text)
assert times == ["14:30:15", "09:45"]
验证车牌号
plate = "ABC-1234"
is_plate = bool(re.match(r'^[A-Z]{3}-\d{4}$', plate))
assert is_plate is True
匹配IPv4地址
ip = "192.168.1.1"
is_ipv4 = bool(re.match(r'^\d{1,3}(\.\d{1,3}){3}$', ip))
assert is_ipv4 is True
提取HTML标签内容
html = "<div>Hello World!</div>"
content = re.findall(r'<div>(.*?)</div>', html)
assert content == ["Hello World!"]
匹配Markdown链接
markdown = "This is a [link](http://example.com)"
link_text = re.findall(r'\[([^\]]+)\]\((http[s]?://\S+)\)', markdown)
assert link_text == [("link", "http://example.com")]
识别代码注释
code = "// This is a comment\nint x = 0; /* block comment */"
comments = re.findall
(r’//.?$|/*.?*/', code, re.DOTALL | re.MULTILINE)
assert comments == [“// This is a comment”, “/* block comment */”]
```
匹配科学计数法数字
number = "1.23e10"
is_scientific = bool(re.match(r'^-?\d+(\.\d+)?e[-+]?\d+$', number))
assert is_scientific is True
解析复杂日志格式
log = "[2023-12-17 10:00:00, INFO] This is an information."
pattern = r'\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}), (\w+)\] (.+)'
match = re.search(pattern, log)
assert match.groups() == ("2023-12-17 10:00:00", "INFO", "This is an information.")
匹配嵌套括号内容
text = "This is a (nested (example) text) string"
nested_content = re.findall(r'\(([^()]|\([^()]*\))*\)', text)
assert nested_content == ["(nested (example) text)"]
验证XML/HTML标签结构
html = "<tag>content</tag>"
is_valid_html = bool(re.match(r'<([a-z]+)>(.*?)</\1>', html))
assert is_valid_html is True
提取嵌入式语言元素
html = "<style>body { background-color: #fff; }</style>"
style_content = re.findall(r'<style>(.*?)</style>', html, re.DOTALL)
assert style_content == ["body { background-color: #fff; }"]
这些题目旨在通过实践提高对Python中正则表达式的理解和应用。每个题目都包含了一个具体的例子和一个assert语句,以便验证你的正则表达式是否正确。