数据检索是从结构化或非结构化数据集中快速定位并提取目标信息的过程, 常见于数据分析、数据库管理和机器学习等领域。其核心目标是通过高效的方法减少计算资源消耗, 提升处理速度。
data = [10, 20, 30, 40, 50, 60, 70, 80, 90]
索引检索
print(data[2])
30
切片检索
print(data[1:5])
[20, 30, 40, 50]
步长检索
print(data[::2])
[10, 30, 50, 70, 90]
2. 字典检索
employee = {
"name": "张三",
"age": 28,
"department": "研发部",
"skills": ["Python", "SQL", "机器学习"]
}
直接检索
print(employee["name"])
张三
安全检索
print(employee.get("salary", 8000))
8000
嵌套检索
print(employee["skills"][0])
Python
查找包含关键字的行。
创建临时文件
with open('xx_log.txt','w') as f:
f.write('hello')
with open("xx_log.txt", "r", encoding="utf-8") as file:
for line_num, line in enumerate(file, 1):
if "ERROR" in line:
print(f"第{line_num}行发现错误: {line.strip()}")
2. CSV文件检索
创建xx_employees.csv文件
import csv
import random
def create_employee_csv(filename="xx_employees.csv"):
"""生成包含员工数据的CSV文件"""
# 定义可能的职位和经验范围
positions = ["Junior Developer", "Developer", "Senior Developer", "Team Lead", "Manager"]
departments = ["Engineering", "Product", "Design", "Marketing", "HR"]
# 创建示例员工数据
employees = []
for i in range(1, 21): # 生成20个员工记录
name = f"Employee_{i}"
email = f"{name.lower()}@company.com"
position = random.choice(positions)
years_exp = random.randint(1, 15)
department = random.choice(departments)
employees.append({
"name": name,
"email": email,
"position": position,
"years_exp": years_exp,
"department": department
})
# 写入CSV文件
with open(filename, "w", newline="") as file:
fieldnames = ["name", "email", "position", "years_exp", "department"]
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(employees)
print(f"已创建员工数据文件: {filename}")
# 创建文件
create_employee_csv()
已创建员工数据文件: xx_employees.csv 筛选结果 - 经验超过5年的高级开发人员: Employee_1 - employee_1@company.com Employee_11 - employee_11@company.com Employee_15 - employee_15@company.com
检索特定条件的记录。
with open("xx_employees.csv", "r") as file:
reader = csv.DictReader(file)
senior_devs = [row for row in reader if row["position"] == "Senior Developer" and int(row["years_exp"]) > 5]
for dev in senior_devs:
print(f"{dev['name']} - {dev['email']}")
Employee_1 - employee_1@company.com Employee_11 - employee_11@company.com Employee_15 - employee_15@company.com
import sqlite3
def search_products(min_price, max_price):
conn = sqlite3.connect("/data/demo/products.db")
cursor = conn.cursor()
query = """
SELECT name, price, category
FROM products
WHERE price BETWEEN ? AND ?
ORDER BY price DESC
"""
cursor.execute(query, (min_price, max_price))
results = cursor.fetchall()
conn.close()
return results
2. MySQL/PostgreSQL检索
对于PostgreSQL
import psycopg2
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[34], line 1 ----> 1 import psycopg2 ModuleNotFoundError: No module named 'psycopg2'
对于MySQL
import mysql.connector
def search_customers(region, min_orders):
conn = psycopg2.connect(
host="localhost",
database="sales",
user="admin",
password="secret"
)
query = """
SELECT customer_id, customer_name, COUNT(order_id) as order_count
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
WHERE region = %s
GROUP BY customer_id, customer_name
HAVING COUNT(order_id) >= %s
"""
with conn.cursor() as cursor:
cursor.execute(query, (region, min_orders))
return cursor.fetchall()