Python结果统计案例如何实现?

wen python案例 4

本文目录导读:

  1. 基础数据统计
  2. 考试成绩统计案例
  3. 文本分析统计
  4. 销售数据统计
  5. 实用工具函数
  6. 可视化统计结果

我来为您详细介绍几种Python结果统计的常见案例实现方法:

基础数据统计

1 统计列表元素出现次数

# 方法1:使用字典统计
def count_elements_basic(data):
    """基础版本:统计列表元素出现次数"""
    count_dict = {}
    for item in data:
        count_dict[item] = count_dict.get(item, 0) + 1
    return count_dict
# 方法2:使用collections.Counter
from collections import Counter
def count_elements_counter(data):
    """使用Counter统计"""
    return Counter(data)
# 示例
scores = [85, 92, 78, 92, 85, 90, 85, 92, 88, 90]
print("基础统计:", count_elements_basic(scores))
print("Counter统计:", count_elements_counter(scores))
# 输出: Counter({85: 3, 92: 3, 90: 2, 78: 1, 88: 1})

考试成绩统计案例

def exam_statistics(scores):
    """考试成绩统计"""
    n = len(scores)
    if n == 0:
        return {}
    total = sum(scores)
    average = total / n
    # 排序
    sorted_scores = sorted(scores)
    # 中位数
    if n % 2 == 0:
        median = (sorted_scores[n//2 - 1] + sorted_scores[n//2]) / 2
    else:
        median = sorted_scores[n//2]
    # 众数
    from collections import Counter
    counter = Counter(scores)
    mode = counter.most_common(1)[0][0]
    # 分数段统计
    grade_distribution = {
        '优秀(90-100)': sum(1 for s in scores if 90 <= s <= 100),
        '良好(80-89)': sum(1 for s in scores if 80 <= s <= 89),
        '中等(70-79)': sum(1 for s in scores if 70 <= s <= 79),
        '及格(60-69)': sum(1 for s in scores if 60 <= s <= 69),
        '不及格(<60)': sum(1 for s in scores if s < 60)
    }
    # 最高分和最低分
    max_score = max(scores)
    min_score = min(scores)
    # 标准差
    variance = sum((x - average) ** 2 for x in scores) / n
    std_dev = variance ** 0.5
    return {
        '总分': total,
        '平均分': round(average, 2),
        '中位数': median,
        '众数': mode,
        '最高分': max_score,
        '最低分': min_score,
        '分数段分布': grade_distribution,
        '标准差': round(std_dev, 2)
    }
# 使用示例
exam_scores = [78, 92, 85, 60, 95, 88, 72, 80, 90, 55, 85, 92]
result = exam_statistics(exam_scores)
for key, value in result.items():
    print(f"{key}: {value}")

文本分析统计

from collections import Counter
import re
def text_statistics(text):
    """文本统计分析"""
    # 基本统计
    char_count = len(text)
    word_count = len(text.split())
    sentence_count = len(re.split(r'[.!?]+', text))
    # 单词频率统计
    words = re.findall(r'\b\w+\b', text.lower())
    word_freq = Counter(words)
    # 最常见的10个单词
    common_words = word_freq.most_common(10)
    # 字符频率统计(忽略空白)
    chars = [c.lower() for c in text if c.isalpha()]
    char_freq = Counter(chars)
    return {
        '总字符数': char_count,
        '总单词数': word_count,
        '总句子数': sentence_count,
        '平均单词长度': round(sum(len(w) for w in words) / len(words), 2) if words else 0,
        '最常用单词': common_words,
        '词汇量': len(word_freq),
        '字符频率': dict(char_freq.most_common(5))  # 最常见的5个字符
    }
# 示例
sample_text = "Python is a powerful programming language. It is widely used in data science and machine learning. Python's simplicity makes it a great choice for beginners."
text_stats = text_statistics(sample_text)
for key, value in text_stats.items():
    print(f"{key}: {value}")

销售数据统计

def sales_statistics(sales_data):
    """销售数据统计"""
    # 数据格式: [{'product': 'A', 'amount': 100, 'date': '2024-01-01'}, ...]
    total_sales = len(sales_data)
    total_revenue = sum(item['amount'] for item in sales_data)
    # 按产品统计
    product_stats = {}
    for item in sales_data:
        product = item['product']
        if product not in product_stats:
            product_stats[product] = {
                '销量': 0,
                '销售额': 0,
                '平均价格': 0
            }
        product_stats[product]['销量'] += 1
        product_stats[product]['销售额'] += item['amount']
    # 计算平均价格
    for product in product_stats:
        stats = product_stats[product]
        stats['平均价格'] = round(stats['销售额'] / stats['销量'], 2)
    # 按月统计
    monthly_sales = {}
    for item in sales_data:
        month = item['date'][:7]  # 提取年月
        if month not in monthly_sales:
            monthly_sales[month] = 0
        monthly_sales[month] += item['amount']
    # 分析和排名
    best_selling_product = max(product_stats.items(), 
                              key=lambda x: x[1]['销量'])
    highest_revenue_product = max(product_stats.items(), 
                                 key=lambda x: x[1]['销售额'])
    return {
        '总订单数': total_sales,
        '总销售额': total_revenue,
        '平均订单金额': round(total_revenue / total_sales, 2) if total_sales else 0,
        '产品统计': product_stats,
        '月度统计': monthly_sales,
        '最畅销产品': best_selling_product[0],
        '销售额最高产品': highest_revenue_product[0]
    }
# 示例
sales_data = [
    {'product': 'A', 'amount': 100, 'date': '2024-01-15'},
    {'product': 'B', 'amount': 200, 'date': '2024-01-20'},
    {'product': 'A', 'amount': 150, 'date': '2024-02-10'},
    {'product': 'C', 'amount': 300, 'date': '2024-02-15'},
    {'product': 'B', 'amount': 180, 'date': '2024-03-05'}
]
sales_result = sales_statistics(sales_data)
for key, value in sales_result.items():
    print(f"\n{key}:")
    if isinstance(value, dict):
        for k, v in value.items():
            print(f"  {k}: {v}")
    else:
        print(f"  {value}")

实用工具函数

import numpy as np
import pandas as pd
def advanced_statistics(data):
    """使用NumPy和Pandas进行高级统计"""
    # 转换为numpy数组
    array = np.array(data)
    # 基本统计量
    basic_stats = {
        '均值': np.mean(array),
        '中位数': np.median(array),
        '标准差': np.std(array),
        '方差': np.var(array),
        '最小值': np.min(array),
        '最大值': np.max(array),
        '25%分位数': np.percentile(array, 25),
        '75%分位数': np.percentile(array, 75)
    }
    # 使用pandas的describe方法
    series = pd.Series(data)
    pandas_stats = series.describe()
    return {
        'NumPy统计': basic_stats,
        'Pandas描述统计': pandas_stats.to_dict()
    }
# 示例
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
print(advanced_statistics(data))

可视化统计结果

import matplotlib.pyplot as plt
def visualize_statistics(data, title="统计结果可视化"):
    """可视化统计结果"""
    from collections import Counter
    # 频数统计
    counter = Counter(data)
    items = list(counter.keys())
    frequencies = list(counter.values())
    # 创建图表
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    # 柱状图
    axes[0].bar(items, frequencies, color='skyblue')
    axes[0].set_title('频数分布')
    axes[0].set_xlabel('类别')
    axes[0].set_ylabel('频数')
    # 饼图
    axes[1].pie(frequencies, labels=items, autopct='%1.1f%%')
    axes[1].set_title('占比分布')
    plt.suptitle(title)
    plt.tight_layout()
    plt.show()
# 示例
data = ['A', 'B', 'A', 'C', 'B', 'A', 'D', 'C', 'A', 'B']
# visualize_statistics(data)  # 取消注释以显示图表

这些案例涵盖了Python结果统计的常见应用场景,您可以根据具体需求选择合适的方法,也可以组合使用这些功能来构建更复杂的统计分析系统。

标签: 统计案例

抱歉,评论功能暂时关闭!