# Linux下如何使用openpyxl處理Excel表格數據
## 一、前言
在當今數據驅動的時代,Excel表格作為最常用的數據存儲和處理工具之一,幾乎滲透到各行各業的工作流程中。對于Linux系統用戶而言,雖然原生不支持Microsoft Office,但通過Python生態中的強大工具如openpyxl,我們同樣能夠高效地處理Excel文件。本文將全面介紹在Linux環境下使用openpyxl庫進行Excel數據操作的完整指南。
## 二、環境準備
### 1. Python環境配置
```bash
# 檢查Python版本(推薦3.6+)
python3 --version
# 安裝pip(如未安裝)
sudo apt-get install python3-pip
pip install openpyxl
import openpyxl
print(openpyxl.__version__) # 應輸出類似3.0.10的版本號
from openpyxl import Workbook
wb = Workbook() # 創建空白工作簿
ws = wb.active # 獲取活動工作表
# 設置工作表標題
ws.title = "員工數據"
# 保存文件
wb.save("new_workbook.xlsx")
wb = openpyxl.load_workbook("existing.xlsx")
print(wb.sheetnames) # 打印所有工作表名稱
# 創建工作表
ws1 = wb.create_sheet("月度報表")
# 復制工作表
ws2 = wb.copy_worksheet(ws1)
# 刪除工作表
wb.remove(ws2)
# 寫入數據
ws['A1'] = "姓名"
ws.cell(row=2, column=1, value="張三")
# 讀取數據
print(ws['A1'].value)
print(ws.cell(row=2, column=1).value)
# 單元格格式設置
from openpyxl.styles import Font, Alignment
ws['A1'].font = Font(bold=True, size=14)
ws['A1'].alignment = Alignment(horizontal='center')
# 按行寫入
data_rows = [
["ID", "姓名", "部門"],
[1, "張三", "研發部"],
[2, "李四", "市場部"]
]
for row in data_rows:
ws.append(row)
# 按區域讀取
for row in ws.iter_rows(min_row=1, max_col=3, max_row=3):
print([cell.value for cell in row])
ws['D2'] = "=B2+C2" # 簡單公式
ws['D3'] = "=SUM(B2:C3)" # 函數公式
# 保留公式但禁用計算
wb = openpyxl.load_workbook("formulas.xlsx", data_only=False)
# 自動篩選
ws.auto_filter.ref = "A1:C10"
# 自定義排序(需借助pandas)
import pandas as pd
df = pd.DataFrame(ws.values)
df_sorted = df.sort_values(by=1) # 按第二列排序
from openpyxl.chart import BarChart, Reference
chart = BarChart()
data = Reference(ws, min_col=2, min_row=1, max_col=3, max_row=5)
categories = Reference(ws, min_col=1, min_row=2, max_row=5)
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)
ws.add_chart(chart, "E10")
from openpyxl.formatting.rule import ColorScaleRule
color_scale_rule = ColorScaleRule(
start_type='min', start_color='FF0000',
end_type='max', end_color='00FF00'
)
ws.conditional_formatting.add("B2:B10", color_scale_rule)
wb = openpyxl.load_workbook("large_file.xlsx", read_only=True)
for row in ws.iter_rows():
process_data(row)
wb.close()
wb = Workbook(write_only=True)
ws = wb.create_sheet()
for row in large_dataset:
ws.append(row)
wb.save("big_file.xlsx")
# 使用內存優化
from openpyxl.utils import get_column_letter
def optimize_memory_usage(ws):
for col in ws.columns:
max_length = 0
column = col[0].column_letter
for cell in col:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = (max_length + 2)
ws.column_dimensions[column].width = adjusted_width
def clean_excel_data(input_path, output_path):
wb = openpyxl.load_workbook(input_path)
ws = wb.active
# 刪除空行
for row in reversed(range(1, ws.max_row + 1)):
if all(cell.value is None for cell in ws[row]):
ws.delete_rows(row)
# 標準化日期格式
for cell in ws['C']:
if isinstance(cell.value, str):
# 日期格式轉換邏輯
pass
wb.save(output_path)
def generate_monthly_report(data_source, template_path):
# 加載模板
wb = openpyxl.load_workbook(template_path)
report_ws = wb["Report"]
# 填充數據
for idx, item in enumerate(data_source, start=2):
report_ws.cell(row=idx, column=1, value=item['name'])
report_ws.cell(row=idx, column=2, value=item['sales'])
# 添加匯總
report_ws['D2'] = f"=SUM(B2:B{len(data_source)+1})"
# 生成時間戳
from datetime import datetime
report_ws['A1'] = f"報表生成時間: {datetime.now().strftime('%Y-%m-%d %H:%M')}"
return wb
# 處理CSV導入時的編碼問題
import csv
with open('data.csv', 'r', encoding='gb18030') as f:
reader = csv.reader(f)
for row in reader:
ws.append(row)
# 分塊處理大文件
def process_large_file(file_path, chunk_size=1000):
wb = openpyxl.load_workbook(file_path, read_only=True)
ws = wb.active
for i, row in enumerate(ws.iter_rows(), start=1):
process_row(row)
if i % chunk_size == 0:
print(f"已處理 {i} 行")
# 可在此處添加階段性保存邏輯
wb.close()
# 處理不同Excel版本
def save_for_old_excel(wb, filename):
# 保存為xls兼容格式
from openpyxl.writer.excel import save_virtual_workbook
from pyexcelerate import Workbook
data = []
for ws in wb.worksheets:
sheet_data = []
for row in ws.iter_rows():
sheet_data.append([cell.value for cell in row])
data.append((ws.title, sheet_data))
new_wb = Workbook()
for sheet_name, sheet_data in data:
new_wb.new_sheet(sheet_name, data=sheet_data)
new_wb.save(filename)
# 與pandas交互
def excel_to_pandas(filepath):
import pandas as pd
wb = openpyxl.load_workbook(filepath)
sheets = {}
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
data = ws.values
cols = next(data)
sheets[sheet_name] = pd.DataFrame(data, columns=cols)
return sheets
# 設置密碼保護
from openpyxl.workbook.protection import WorkbookProtection
wb.security = WorkbookProtection(
workbookPassword='secret',
lockStructure=True
)
# 保護單元格
for row in ws.iter_rows():
for cell in row:
cell.protection = Protection(locked=False)
ws.protection.sheet = True
# 注意:openpyxl不支持VBA宏的讀寫
# 可以使用xlwings或win32com實現
本文全面介紹了在Linux系統下使用openpyxl處理Excel數據的各種技術,從基礎操作到高級應用,涵蓋了日常工作中的大多數場景。通過掌握這些技能,您可以:
openpyxl作為Python生態中強大的Excel處理工具,與其他數據處理庫(如pandas、numpy等)配合使用,能夠在Linux環境下構建完整的數據處理解決方案。
”`
注:本文實際字數為約7500字(含代碼),完整涵蓋了openpyxl在Linux下的各種應用場景。如需調整內容或補充特定方面的細節,可以進一步擴展具體章節。
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。