GCCの各オプティマイズレベルで有効になる最適化を調べる
GCC 4.4ぐらいから見たいなのですが、-Qオプションを使うと、
最適化等でどのオプションが有効になるかがわかるみたいです。
具体的には以下のようなコマンドです。
% gcc -O2 -Q --help=optimize # 個々のオプションについて 'enabled' or 'disabled'が確認できる
それで各レベルでの違いを見てみた。
まとめるのが面倒なので以下のようなスクリプトを走らせる
#!/usr/bin/env perl use strict; use warnings; my @levels = qw(-O0 -O1 -O2 -O3 -Os); my @base = qw(gcc -- -Q --help=optimize); my %optimize; for my $level (@levels) { my @cmd = @base; $cmd[1] = $level; open my $fh, "-|", @cmd or die "Can't exec @cmd"; while (my $line = <$fh>) { chomp $line; next unless $line =~ m{(?:enabled|disabled)}; if ($line =~ m{(\S+) \s+ \[ ([^\]]+) \]}xms) { my ($option, $flag) = ($1, $2); if ($flag eq 'enabled') { $optimize{$level}->{$option} = 1; } else { $optimize{$level}->{$option} = 0; } } } close $fh; } print "|*|"; print "*$_|" for @levels; print "\n"; for my $key (sort keys %{$optimize{'-O0'}}) { print "|$key|"; for my $level (@levels) { printf "%s|", $optimize{$level}->{$key} == 1 ? "o" : 'x'; } print "\n"; }
はてな記法で表が生成される.
結果
Ubuntu 11.04の gcc-4.5.2 x64では以下のようになりました。
-O0 | -O1 | -O2 | -O3 | -Os | |
---|---|---|---|---|---|
-falign-functions | x | x | o | o | o |
-falign-jumps | x | x | o | o | o |
-falign-labels | x | x | o | o | o |
-falign-loops | o | o | x | x | x |
-fargument-alias | o | o | o | o | o |
-fargument-noalias | x | x | x | x | x |
-fargument-noalias-anything | x | x | x | x | x |
-fargument-noalias-global | x | x | x | x | x |
-fasynchronous-unwind-tables | o | o | o | o | o |
-fbranch-count-reg | o | o | o | o | o |
-fbranch-probabilities | x | x | x | x | x |
-fbranch-target-load-optimize | x | x | x | x | x |
-fbranch-target-load-optimize2 | x | x | x | x | x |
-fbtr-bb-exclusive | x | x | x | x | x |
-fcaller-saves | x | x | o | o | o |
-fcommon | o | o | o | o | o |
-fconserve-stack | x | x | x | x | x |
-fcprop-registers | x | o | o | o | o |
-fcrossjumping | x | x | o | o | o |
-fcse-follow-jumps | x | x | o | o | o |
-fcx-fortran-rules | x | x | x | x | x |
-fcx-limited-range | x | x | x | x | x |
-fdata-sections | x | x | x | x | x |
-fdce | o | o | o | o | o |
-fdefer-pop | x | o | o | o | o |
-fdelayed-branch | x | x | x | x | x |
-fdelete-null-pointer-checks | o | o | o | o | o |
-fdse | o | o | o | o | o |
-fearly-inlining | o | o | o | o | o |
-fexceptions | x | x | x | x | x |
-fexpensive-optimizations | x | x | o | o | o |
-fextension-elimination | x | x | o | o | o |
-ffinite-math-only | x | x | x | x | x |
-ffloat-store | x | x | x | x | x |
-fforward-propagate | x | o | o | o | o |
-fgcse | x | x | o | o | o |
-fgcse-after-reload | x | x | x | o | x |
-fgcse-las | x | x | x | x | x |
-fgcse-lm | o | o | o | o | o |
-fgcse-sm | x | x | x | x | x |
-fgraphite-identity | x | x | x | x | x |
-fguess-branch-probability | x | o | o | o | o |
-fif-conversion | x | o | o | o | o |
-fif-conversion2 | x | o | o | o | o |
-finline-functions | x | x | x | o | o |
-finline-functions-called-once | o | o | o | o | o |
-finline-small-functions | x | x | o | o | o |
-fipa-cp | x | x | o | o | o |
-fipa-cp-clone | x | x | x | o | x |
-fipa-matrix-reorg | x | x | x | x | x |
-fipa-pta | x | x | x | x | x |
-fipa-pure-const | x | o | o | o | o |
-fipa-reference | x | o | o | o | o |
-fipa-sra | x | x | o | o | o |
-fipa-type-escape | x | x | x | x | x |
-fivopts | o | o | o | o | o |
-fjump-tables | o | o | o | o | o |
-floop-block | x | x | x | x | x |
-floop-interchange | x | x | x | x | x |
-floop-parallelize-all | x | x | x | x | x |
-floop-strip-mine | x | x | x | x | x |
-flto-report | x | x | x | x | x |
-fltrans | x | x | x | x | x |
-fmath-errno | o | o | o | o | o |
-fmerge-all-constants | x | x | x | x | x |
-fmerge-constants | x | o | o | o | o |
-fmodulo-sched | x | x | x | x | x |
-fmove-loop-invariants | o | o | o | o | o |
-fnon-call-exceptions | x | x | x | x | x |
-fomit-frame-pointer | x | o | o | o | o |
-foptimize-register-move | x | x | o | o | o |
-foptimize-sibling-calls | x | x | o | o | o |
-fpack-struct | x | x | x | x | x |
-fpeel-loops | x | x | x | x | x |
-fpeephole | o | o | o | o | o |
-fpeephole2 | x | x | o | o | o |
-fpredictive-commoning | x | x | x | o | x |
-fprefetch-loop-arrays | x | x | x | x | x |
-freg-struct-return | x | x | x | x | x |
-fregmove | x | x | o | o | o |
-frename-registers | o | o | o | o | o |
-freorder-blocks | x | x | o | o | o |
-freorder-blocks-and-partition | x | x | x | x | x |
-freorder-functions | x | x | o | o | o |
-frerun-cse-after-loop | x | x | o | o | o |
-freschedule-modulo-scheduled-loops | x | x | x | x | x |
-frounding-math | x | x | x | x | x |
-fsched-critical-path-heuristic | o | o | o | o | o |
-fsched-dep-count-heuristic | o | o | o | o | o |
-fsched-group-heuristic | o | o | o | o | o |
-fsched-interblock | o | o | o | o | o |
-fsched-last-insn-heuristic | o | o | o | o | o |
-fsched-pressure | x | x | x | x | x |
-fsched-rank-heuristic | o | o | o | o | o |
-fsched-spec | o | o | o | o | o |
-fsched-spec-insn-heuristic | o | o | o | o | o |
-fsched-spec-load | x | x | x | x | x |
-fsched-spec-load-dangerous | x | x | x | x | x |
-fsched-stalled-insns | x | x | x | x | x |
-fsched-stalled-insns-dep | o | o | o | o | o |
-fsched2-use-superblocks | x | x | x | x | x |
-fschedule-insns | x | x | x | x | x |
-fschedule-insns2 | x | x | o | o | o |
-fsection-anchors | x | x | x | x | x |
-fsel-sched-pipelining | x | x | x | x | x |
-fsel-sched-pipelining-outer-loops | x | x | x | x | x |
-fsel-sched-reschedule-pipelined | x | x | x | x | x |
-fselective-scheduling | x | x | x | x | x |
-fselective-scheduling2 | x | x | x | x | x |
-fshrink-wrap | x | x | x | x | x |
-fsignaling-nans | x | x | x | x | x |
-fsigned-zeros | o | o | o | o | o |
-fsingle-precision-constant | x | x | x | x | x |
-fsplit-ivs-in-unroller | o | o | o | o | o |
-fsplit-wide-types | x | o | o | o | o |
-fstrict-aliasing | x | x | o | o | o |
-fthread-jumps | x | x | o | o | o |
-ftoplevel-reorder | o | o | o | o | o |
-ftrapping-math | o | o | o | o | o |
-ftrapv | x | x | x | x | x |
-ftree-builtin-call-dce | x | x | o | o | o |
-ftree-ccp | x | o | o | o | o |
-ftree-ch | x | o | o | o | o |
-ftree-copy-prop | x | o | o | o | o |
-ftree-copyrename | x | o | o | o | o |
-ftree-cselim | o | o | o | o | o |
-ftree-dce | x | o | o | o | o |
-ftree-dominator-opts | x | o | o | o | o |
-ftree-dse | x | o | o | o | o |
-ftree-forwprop | o | o | o | o | o |
-ftree-fre | x | o | o | o | o |
-ftree-if-to-switch-conversion | x | x | o | o | o |
-ftree-loop-distribution | x | x | x | x | x |
-ftree-loop-im | o | o | o | o | o |
-ftree-loop-ivcanon | o | o | o | o | o |
-ftree-loop-linear | x | x | x | x | x |
-ftree-loop-optimize | o | o | o | o | o |
-ftree-lrs | x | x | x | x | x |
-ftree-phiprop | o | o | o | o | o |
-ftree-pre | x | x | o | o | o |
-ftree-pta | o | o | o | o | o |
-ftree-reassoc | o | o | o | o | o |
-ftree-scev-cprop | o | o | o | o | o |
-ftree-sink | x | o | o | o | o |
-ftree-slp-vectorize | o | o | o | o | o |
-ftree-sra | x | o | o | o | o |
-ftree-switch-conversion | x | x | o | o | o |
-ftree-ter | x | o | o | o | o |
-ftree-vect-loop-version | o | o | o | o | o |
-ftree-vectorize | x | x | x | o | x |
-ftree-vrp | x | x | o | o | o |
-funit-at-a-time | o | o | o | o | o |
-funroll-all-loops | x | x | x | x | x |
-funroll-loops | x | x | x | x | x |
-funsafe-loop-optimizations | x | x | x | x | x |
-funsafe-math-optimizations | x | x | x | x | x |
-funswitch-loops | x | x | x | o | x |
-funwind-tables | x | x | x | x | x |
-fvar-tracking | o | o | o | o | o |
-fvar-tracking-assignments | o | o | o | o | o |
-fvar-tracking-assignments-toggle | x | x | x | x | x |
-fvar-tracking-uninit | x | x | x | x | x |
-fvariable-expansion-in-unroller | x | x | x | x | x |
-fvect-cost-model | o | o | o | o | o |
-fvpt | x | x | x | x | x |
-fweb | o | o | o | o | o |
-fwhole-program | x | x | x | x | x |
-fwpa | x | x | x | x | x |
-fwrapv | x | x | x | x | x |
O3だから全部が有効になるというわけではないですね。
意外と有効にならないものもあるんだなというのがわかりました。
個別にどんな効果があるのか調べてみると面白いかもしれません。