-
Notifications
You must be signed in to change notification settings - Fork 41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Skip values (moreso, RandomRuns) we have already tested #207
base: master
Are you sure you want to change the base?
Changes from 5 commits
5dd5cc2
73e6710
434f819
72b744b
21e9ba4
ae2a149
73c0351
5b47273
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ import MicroListExtra as List | |
import MicroMaybeExtra as Maybe | ||
import PRNG | ||
import Random | ||
import RandomRun exposing (RandomRun) | ||
import Set exposing (Set) | ||
import Simplify | ||
import Test.Distribution exposing (DistributionReport(..)) | ||
import Test.Distribution.Internal exposing (Distribution(..), ExpectedDistribution(..)) | ||
|
@@ -29,26 +31,30 @@ fuzzTest distribution fuzzer untrimmedDesc getExpectation = | |
blankDescriptionFailure | ||
|
||
else | ||
ElmTestVariant__Labeled desc <| validatedFuzzTest fuzzer getExpectation distribution | ||
ElmTestVariant__Labeled desc <| validatedFuzzTest desc fuzzer getExpectation distribution | ||
|
||
|
||
{-| Knowing that the fuzz test isn't obviously invalid, run the test and package up the results. | ||
-} | ||
validatedFuzzTest : Fuzzer a -> (a -> Expectation) -> Distribution a -> Test | ||
validatedFuzzTest fuzzer getExpectation distribution = | ||
validatedFuzzTest : String -> Fuzzer a -> (a -> Expectation) -> Distribution a -> Test | ||
validatedFuzzTest description fuzzer getExpectation distribution = | ||
ElmTestVariant__FuzzTest | ||
(\seed runs -> | ||
let | ||
constants : LoopConstants a | ||
constants = | ||
{ fuzzer = fuzzer | ||
, testFn = getExpectation | ||
, initialSeed = seed | ||
, runsNeeded = runs | ||
, distribution = distribution | ||
, skipsAllowed = runs * maxSkippedRunsRatio | ||
, description = description | ||
} | ||
|
||
runResult : RunResult | ||
runResult = | ||
fuzzLoop | ||
{ fuzzer = fuzzer | ||
, testFn = getExpectation | ||
, initialSeed = seed | ||
, runsNeeded = runs | ||
, distribution = distribution | ||
} | ||
(initLoopState seed distribution) | ||
fuzzLoop constants (initLoopState constants) | ||
in | ||
case runResult.failure of | ||
Nothing -> | ||
|
@@ -77,6 +83,8 @@ type alias LoopConstants a = | |
, initialSeed : Random.Seed | ||
, runsNeeded : Int | ||
, distribution : Distribution a | ||
, skipsAllowed : Int | ||
, description : String | ||
} | ||
|
||
|
||
|
@@ -86,15 +94,17 @@ type alias LoopState = | |
, nextPowerOfTwo : Int | ||
, failure : Maybe Failure | ||
, currentSeed : Random.Seed | ||
, seenRandomRuns : Set (List Int) | ||
, runsSkipped : Int | ||
} | ||
|
||
|
||
initLoopState : Random.Seed -> Distribution a -> LoopState | ||
initLoopState initialSeed distribution = | ||
initLoopState : LoopConstants a -> LoopState | ||
initLoopState c = | ||
let | ||
initialDistributionCount : Maybe (Dict (List String) Int) | ||
initialDistributionCount = | ||
Test.Distribution.Internal.getDistributionLabels distribution | ||
Test.Distribution.Internal.getDistributionLabels c.distribution | ||
|> Maybe.map | ||
(\labels -> | ||
labels | ||
|
@@ -106,10 +116,29 @@ initLoopState initialSeed distribution = | |
, distributionCount = initialDistributionCount | ||
, nextPowerOfTwo = 1 | ||
, failure = Nothing | ||
, currentSeed = initialSeed | ||
, currentSeed = c.initialSeed | ||
, seenRandomRuns = Set.empty | ||
, runsSkipped = 0 | ||
} | ||
|
||
|
||
{-| If user specified 100 runs and this ratio is 2, we can only skip 100\*2 = | ||
200 values before stopping. | ||
|
||
Consider `Fuzz.bool`: it only has two possible RandomRuns: | ||
|
||
- [ 0 ] --> False | ||
- [ 1 ] --> True | ||
|
||
We'll likely try those pretty soon. We don't have a good way of figuring out | ||
that's all of them so we'll just skip them for until 200 values have been tried. | ||
|
||
-} | ||
maxSkippedRunsRatio : Int | ||
maxSkippedRunsRatio = | ||
2 | ||
|
||
|
||
{-| Runs fuzz tests repeatedly and returns information about distribution and possible failure. | ||
|
||
The loop algorithm is roughly: | ||
|
@@ -151,13 +180,13 @@ fuzzLoop c state = | |
Just distributionCount -> | ||
DistributionToReport | ||
{ distributionCount = includeCombinationsInBaseCounts distributionCount | ||
, runsElapsed = state.runsElapsed | ||
, runsElapsed = state.runsElapsed + state.runsSkipped | ||
} | ||
, failure = Just failure | ||
} | ||
|
||
Nothing -> | ||
if state.runsElapsed < c.runsNeeded then | ||
if state.runsElapsed < c.runsNeeded && state.runsSkipped < c.skipsAllowed then | ||
let | ||
newState : LoopState | ||
newState = | ||
|
@@ -182,7 +211,7 @@ fuzzLoop c state = | |
{ distributionReport = | ||
DistributionToReport | ||
{ distributionCount = includeCombinationsInBaseCounts distributionCount | ||
, runsElapsed = state.runsElapsed | ||
, runsElapsed = state.runsElapsed + state.runsSkipped | ||
} | ||
, failure = Nothing | ||
} | ||
|
@@ -210,7 +239,7 @@ fuzzLoop c state = | |
{ distributionReport = | ||
DistributionCheckSucceeded | ||
{ distributionCount = distributionCount | ||
, runsElapsed = state.runsElapsed | ||
, runsElapsed = state.runsElapsed + state.runsSkipped | ||
} | ||
, failure = Nothing | ||
} | ||
|
@@ -282,7 +311,7 @@ allSufficientlyCovered c state normalizedDistributionCount = | |
True | ||
|
||
AtLeast n -> | ||
Test.Distribution.Internal.sufficientlyCovered state.runsElapsed count (n / 100) | ||
Test.Distribution.Internal.sufficientlyCovered (state.runsElapsed + state.runsSkipped) count (n / 100) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it make sense to include the skipped runs in the distribution coverage? I'm not saying no, but I think it's a question with a non-obvious answer... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the answer should be yes: we already know what the fuzzed value was when skipping, so we can count it into the fuzzer distribution. What we skipped was running the test function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But is it valuable information? Imagine that I want to classify even/odd, and my generator generates the following:
Then I'll see that I had a an 50%/50% distribution of even/odd, but does it give me any more confidence about what I'm testing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe fuzzer distribution is about classifying what the fuzzer gives you, irregardless of the test function ( Test skipping only deals with skipping the test function, not skipping the fuzzing. This PR shouldn't change the behaviour of the fuzzer distribution reporting. I believe I understand your issue, but it seems to me it's unrelated to this PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's fair enough. I think the main relation here is that the implementation here could implement both concerns by simultaneously being simpler. However, if you don't feel like wading into those waters in this PR, that's fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think you're suggesting we remove skipped values from the distribution counts. But wouldn't that invalidate the claim that the distribution counts deal with what the fuzzer returns (again, thinking about It would basically change
into
That to me is a different feature 🤷 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unique values is what we care about though isn't it? So distribution of unique values is the valuable thing that we should be highlighting to developers. Also I disagree that distributions are just about the Fuzzer - they are about giving you confidence that your fuzz test is adequately sampling the problem space and not just focusing on some particular corner of it. Additionally there could be some nice highlighting of overproduction of duplicates:
or some such, but that's perhaps just overcomplicating things for little benefit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's where our understanding differs currently -- I'll think about it and give the idea a chance! Perhaps you're right |
||
) | ||
) | ||
) | ||
|
@@ -323,9 +352,9 @@ findBadZeroRelatedCase c state normalizedDistributionCount = | |
|> Maybe.map | ||
(\count -> | ||
{ label = label | ||
, actualPercentage = toFloat count * 100 / toFloat state.runsElapsed | ||
, actualPercentage = toFloat count * 100 / toFloat (state.runsElapsed + state.runsSkipped) | ||
, expectedDistribution = expectedDistribution | ||
, runsElapsed = state.runsElapsed | ||
, runsElapsed = state.runsElapsed + state.runsSkipped | ||
, distributionCount = distributionCount | ||
} | ||
) | ||
|
@@ -369,14 +398,14 @@ findInsufficientlyCoveredLabel c state normalizedDistributionCount = | |
False | ||
|
||
AtLeast n -> | ||
Test.Distribution.Internal.insufficientlyCovered state.runsElapsed count (n / 100) | ||
Test.Distribution.Internal.insufficientlyCovered (state.runsElapsed + state.runsSkipped) count (n / 100) | ||
) | ||
|> Maybe.map | ||
(\( label, count, expectedDistribution ) -> | ||
{ label = label | ||
, actualPercentage = toFloat count * 100 / toFloat state.runsElapsed | ||
, actualPercentage = toFloat count * 100 / toFloat (state.runsElapsed + state.runsSkipped) | ||
, expectedDistribution = expectedDistribution | ||
, runsElapsed = state.runsElapsed | ||
, runsElapsed = state.runsElapsed + state.runsSkipped | ||
, distributionCount = distributionCount | ||
} | ||
) | ||
|
@@ -475,63 +504,89 @@ runOnce c state = | |
|
||
Nothing -> | ||
stepSeed state.currentSeed | ||
|
||
( maybeFailure, newDistributionCounter ) = | ||
case genResult of | ||
Rejected { reason } -> | ||
( Just | ||
in | ||
case genResult of | ||
Rejected { reason } -> | ||
{ state | ||
| failure = | ||
Just | ||
{ given = Nothing | ||
, expectation = | ||
Test.Expectation.fail | ||
{ description = reason | ||
, reason = Invalid InvalidFuzzer | ||
} | ||
} | ||
, state.distributionCount | ||
) | ||
, currentSeed = nextSeed | ||
, runsElapsed = state.runsElapsed + 1 | ||
} | ||
|
||
Generated { prng, value } -> | ||
let | ||
failure : Maybe Failure | ||
failure = | ||
testGeneratedValue | ||
{ getExpectation = c.testFn | ||
, fuzzer = c.fuzzer | ||
, randomRun = PRNG.getRun prng | ||
, value = value | ||
, expectation = c.testFn value | ||
} | ||
Generated { prng, value } -> | ||
let | ||
randomRun : RandomRun | ||
randomRun = | ||
PRNG.getRun prng | ||
|
||
randomRunList : List Int | ||
randomRunList = | ||
RandomRun.toList randomRun | ||
|
||
nextDistributionCount : Maybe (Dict (List String) Int) | ||
nextDistributionCount = | ||
Maybe.map2 | ||
(\labels old -> | ||
let | ||
foundLabels : List String | ||
foundLabels = | ||
labels | ||
|> List.filterMap | ||
(\( label, predicate ) -> | ||
if predicate value then | ||
Just label | ||
|
||
else | ||
Nothing | ||
) | ||
in | ||
Dict.increment foundLabels old | ||
) | ||
(Test.Distribution.Internal.getDistributionLabels c.distribution) | ||
state.distributionCount | ||
in | ||
if Set.member randomRunList state.seenRandomRuns then | ||
{- We've tried this RandomRun already and know it ends in Pass | ||
(if it ended in Failure we'd have short-circuited). | ||
Let's not do unneeded work! | ||
|
||
We still want to update the distribution counts though, | ||
those don't care about whether the value passes or fails the | ||
test. | ||
-} | ||
{ state | ||
| currentSeed = nextSeed | ||
, distributionCount = nextDistributionCount | ||
, runsSkipped = state.runsSkipped + 1 | ||
} | ||
|
||
distributionCounter : Maybe (Dict (List String) Int) | ||
distributionCounter = | ||
Maybe.map2 | ||
(\labels old -> | ||
let | ||
foundLabels : List String | ||
foundLabels = | ||
labels | ||
|> List.filterMap | ||
(\( label, predicate ) -> | ||
if predicate value then | ||
Just label | ||
|
||
else | ||
Nothing | ||
) | ||
in | ||
Dict.increment foundLabels old | ||
) | ||
(Test.Distribution.Internal.getDistributionLabels c.distribution) | ||
state.distributionCount | ||
in | ||
( failure, distributionCounter ) | ||
in | ||
{ state | ||
| failure = maybeFailure | ||
, distributionCount = newDistributionCounter | ||
, currentSeed = nextSeed | ||
, runsElapsed = state.runsElapsed + 1 | ||
} | ||
else | ||
let | ||
nextFailure : Maybe Failure | ||
nextFailure = | ||
testGeneratedValue | ||
{ getExpectation = c.testFn | ||
, fuzzer = c.fuzzer | ||
, randomRun = randomRun | ||
, value = value | ||
, expectation = c.testFn value | ||
} | ||
in | ||
{ state | ||
| failure = nextFailure | ||
, distributionCount = nextDistributionCount | ||
, currentSeed = nextSeed | ||
, runsElapsed = state.runsElapsed + 1 | ||
, seenRandomRuns = Set.insert randomRunList state.seenRandomRuns | ||
} | ||
|
||
|
||
includeCombinationsInBaseCounts : Dict (List String) Int -> Dict (List String) Int | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't there be some sort of distinct failure when the
skipsAllowed
is exceeded? That basically means yourFuzzer
is kind of rubbish, no?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
skipsAllowed
exceeded could mean you've exhausted all possible values (or at least, finding new values is improbable). ConsiderFuzz.bool
orFuzz.intRange 1 10
. Over 100 runs those could skip 98 times and 90 times respectively (depending on your settings)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For this reason I don't think we should fail. If anything, we could stop early, but let's do that when implementing exhaustiveness? This way skipping basically only lets you fuzz more diverse inputs / not waste time on things you already know answer to
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess we can punt on this question until we implement exhaustiveness. Because in those cases the test runner should just switch to exhaustive checking, so this kind of failure should be reserved for more pathological fuzzers:
which would just be kind of bad, but not really exhaustive...