Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip values (moreso, RandomRuns) we have already tested #207

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Fuzz/Internal.elm
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ of the library.

import GenResult exposing (GenResult)
import PRNG exposing (PRNG)
import Random
import RandomRun


type Fuzzer a
Expand Down
201 changes: 128 additions & 73 deletions src/Test/Fuzz.elm
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import MicroListExtra as List
import MicroMaybeExtra as Maybe
import PRNG
import Random
import RandomRun exposing (RandomRun)
import Set exposing (Set)
import Simplify
import Test.Distribution exposing (DistributionReport(..))
import Test.Distribution.Internal exposing (Distribution(..), ExpectedDistribution(..))
Expand All @@ -29,26 +31,30 @@ fuzzTest distribution fuzzer untrimmedDesc getExpectation =
blankDescriptionFailure

else
ElmTestVariant__Labeled desc <| validatedFuzzTest fuzzer getExpectation distribution
ElmTestVariant__Labeled desc <| validatedFuzzTest desc fuzzer getExpectation distribution


{-| Knowing that the fuzz test isn't obviously invalid, run the test and package up the results.
-}
validatedFuzzTest : Fuzzer a -> (a -> Expectation) -> Distribution a -> Test
validatedFuzzTest fuzzer getExpectation distribution =
validatedFuzzTest : String -> Fuzzer a -> (a -> Expectation) -> Distribution a -> Test
validatedFuzzTest description fuzzer getExpectation distribution =
ElmTestVariant__FuzzTest
(\seed runs ->
let
constants : LoopConstants a
constants =
{ fuzzer = fuzzer
, testFn = getExpectation
, initialSeed = seed
, runsNeeded = runs
, distribution = distribution
, skipsAllowed = runs * maxSkippedRunsRatio
, description = description
}

runResult : RunResult
runResult =
fuzzLoop
{ fuzzer = fuzzer
, testFn = getExpectation
, initialSeed = seed
, runsNeeded = runs
, distribution = distribution
}
(initLoopState seed distribution)
fuzzLoop constants (initLoopState constants)
in
case runResult.failure of
Nothing ->
Expand Down Expand Up @@ -77,6 +83,8 @@ type alias LoopConstants a =
, initialSeed : Random.Seed
, runsNeeded : Int
, distribution : Distribution a
, skipsAllowed : Int
, description : String
}


Expand All @@ -86,15 +94,17 @@ type alias LoopState =
, nextPowerOfTwo : Int
, failure : Maybe Failure
, currentSeed : Random.Seed
, seenRandomRuns : Set (List Int)
, runsSkipped : Int
}


initLoopState : Random.Seed -> Distribution a -> LoopState
initLoopState initialSeed distribution =
initLoopState : LoopConstants a -> LoopState
initLoopState c =
let
initialDistributionCount : Maybe (Dict (List String) Int)
initialDistributionCount =
Test.Distribution.Internal.getDistributionLabels distribution
Test.Distribution.Internal.getDistributionLabels c.distribution
|> Maybe.map
(\labels ->
labels
Expand All @@ -106,10 +116,29 @@ initLoopState initialSeed distribution =
, distributionCount = initialDistributionCount
, nextPowerOfTwo = 1
, failure = Nothing
, currentSeed = initialSeed
, currentSeed = c.initialSeed
, seenRandomRuns = Set.empty
, runsSkipped = 0
}


{-| If user specified 100 runs and this ratio is 2, we can only skip 100\*2 =
200 values before stopping.

Consider `Fuzz.bool`: it only has two possible RandomRuns:

- [ 0 ] --> False
- [ 1 ] --> True

We'll likely try those pretty soon. We don't have a good way of figuring out
that's all of them so we'll just skip them for until 200 values have been tried.

-}
maxSkippedRunsRatio : Int
maxSkippedRunsRatio =
2


{-| Runs fuzz tests repeatedly and returns information about distribution and possible failure.

The loop algorithm is roughly:
Expand Down Expand Up @@ -151,13 +180,13 @@ fuzzLoop c state =
Just distributionCount ->
DistributionToReport
{ distributionCount = includeCombinationsInBaseCounts distributionCount
, runsElapsed = state.runsElapsed
, runsElapsed = state.runsElapsed + state.runsSkipped
}
, failure = Just failure
}

Nothing ->
if state.runsElapsed < c.runsNeeded then
if state.runsElapsed < c.runsNeeded && state.runsSkipped < c.skipsAllowed then
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't there be some sort of distinct failure when the skipsAllowed is exceeded? That basically means your Fuzzer is kind of rubbish, no?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

skipsAllowed exceeded could mean you've exhausted all possible values (or at least, finding new values is improbable). Consider Fuzz.bool or Fuzz.intRange 1 10. Over 100 runs those could skip 98 times and 90 times respectively (depending on your settings)

Copy link
Collaborator Author

@Janiczek Janiczek Oct 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this reason I don't think we should fail. If anything, we could stop early, but let's do that when implementing exhaustiveness? This way skipping basically only lets you fuzz more diverse inputs / not waste time on things you already know answer to

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we can punt on this question until we implement exhaustiveness. Because in those cases the test runner should just switch to exhaustive checking, so this kind of failure should be reserved for more pathological fuzzers:

Fuzz.floatRange 0 1
|> Fuzz.andThen (\p ->
 if p < 0.9 then 
    Fuzz.constant 0
 else
    Fuzz.intRange 0 100
)

which would just be kind of bad, but not really exhaustive...

let
newState : LoopState
newState =
Expand All @@ -182,7 +211,7 @@ fuzzLoop c state =
{ distributionReport =
DistributionToReport
{ distributionCount = includeCombinationsInBaseCounts distributionCount
, runsElapsed = state.runsElapsed
, runsElapsed = state.runsElapsed + state.runsSkipped
}
, failure = Nothing
}
Expand Down Expand Up @@ -210,7 +239,7 @@ fuzzLoop c state =
{ distributionReport =
DistributionCheckSucceeded
{ distributionCount = distributionCount
, runsElapsed = state.runsElapsed
, runsElapsed = state.runsElapsed + state.runsSkipped
}
, failure = Nothing
}
Expand Down Expand Up @@ -282,7 +311,7 @@ allSufficientlyCovered c state normalizedDistributionCount =
True

AtLeast n ->
Test.Distribution.Internal.sufficientlyCovered state.runsElapsed count (n / 100)
Test.Distribution.Internal.sufficientlyCovered (state.runsElapsed + state.runsSkipped) count (n / 100)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to include the skipped runs in the distribution coverage? I'm not saying no, but I think it's a question with a non-obvious answer...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the answer should be yes: we already know what the fuzzed value was when skipping, so we can count it into the fuzzer distribution. What we skipped was running the test function.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But is it valuable information? Imagine that I want to classify even/odd, and my generator generates the following:

0, 1, 2, 4, 1, 6, 1, 8, 1, 10, 1, 12, 1

Then I'll see that I had a an 50%/50% distribution of even/odd, but does it give me any more confidence about what I'm testing?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe fuzzer distribution is about classifying what the fuzzer gives you, irregardless of the test function (... -> Expectation). ie. you could just as well Fuzz.examples 10000 myFuzzer in the REPL and then do some statistics on that, and get the same result.

Test skipping only deals with skipping the test function, not skipping the fuzzing. This PR shouldn't change the behaviour of the fuzzer distribution reporting.

I believe I understand your issue, but it seems to me it's unrelated to this PR.

Copy link
Contributor

@gampleman gampleman Oct 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems to me it's unrelated to this PR

That's fair enough. I think the main relation here is that the implementation here could implement both concerns by simultaneously being simpler. However, if you don't feel like wading into those waters in this PR, that's fine.

Copy link
Collaborator Author

@Janiczek Janiczek Oct 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the main relation here is that the implementation here could implement both concerns by simultaneously being simpler.

I think you're suggesting we remove skipped values from the distribution counts. But wouldn't that invalidate the claim that the distribution counts deal with what the fuzzer returns (again, thinking about Fuzz.examples 10000 myFuzzer)? That would change it into "unique values the fuzzer returns". I don't know if that's helpful.

It would basically change

Distribution report:
====================
  2-19:         90.1%  (27007x)  ███████████████████████████░░░
  1:               5%   (1485x)  █░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
  20:            4.9%   (1468x)  █░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
  <1, >20:         0%      (0x)  ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░

into

Distribution report:
====================
  2-19:           90%  (18x)  ███████████████████████████░░░
  1:               5%   (1x)  █░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
  20:              5%   (1x)  █░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
  <1, >20:         0%   (0x)  ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░

That to me is a different feature 🤷

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unique values is what we care about though isn't it? So distribution of unique values is the valuable thing that we should be highlighting to developers.

Also I disagree that distributions are just about the Fuzzer - they are about giving you confidence that your fuzz test is adequately sampling the problem space and not just focusing on some particular corner of it.


Additionally there could be some nice highlighting of overproduction of duplicates:

Distribution report:
====================
  2-19:           90%  (18x)  ███████████████████████████░░░
  1:               5%   (1x)  █░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ ⚠️ Fuzzer produces 1480 duplicated values
  20:              5%   (1x)  █░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
  <1, >20:         0%   (0x)  ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░

or some such, but that's perhaps just overcomplicating things for little benefit.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unique values is what we care about though isn't it? So distribution of unique values is the valuable thing that we should be highlighting to developers.

That's where our understanding differs currently -- I'll think about it and give the idea a chance! Perhaps you're right

)
)
)
Expand Down Expand Up @@ -323,9 +352,9 @@ findBadZeroRelatedCase c state normalizedDistributionCount =
|> Maybe.map
(\count ->
{ label = label
, actualPercentage = toFloat count * 100 / toFloat state.runsElapsed
, actualPercentage = toFloat count * 100 / toFloat (state.runsElapsed + state.runsSkipped)
, expectedDistribution = expectedDistribution
, runsElapsed = state.runsElapsed
, runsElapsed = state.runsElapsed + state.runsSkipped
, distributionCount = distributionCount
}
)
Expand Down Expand Up @@ -369,14 +398,14 @@ findInsufficientlyCoveredLabel c state normalizedDistributionCount =
False

AtLeast n ->
Test.Distribution.Internal.insufficientlyCovered state.runsElapsed count (n / 100)
Test.Distribution.Internal.insufficientlyCovered (state.runsElapsed + state.runsSkipped) count (n / 100)
)
|> Maybe.map
(\( label, count, expectedDistribution ) ->
{ label = label
, actualPercentage = toFloat count * 100 / toFloat state.runsElapsed
, actualPercentage = toFloat count * 100 / toFloat (state.runsElapsed + state.runsSkipped)
, expectedDistribution = expectedDistribution
, runsElapsed = state.runsElapsed
, runsElapsed = state.runsElapsed + state.runsSkipped
, distributionCount = distributionCount
}
)
Expand Down Expand Up @@ -475,63 +504,89 @@ runOnce c state =

Nothing ->
stepSeed state.currentSeed

( maybeFailure, newDistributionCounter ) =
case genResult of
Rejected { reason } ->
( Just
in
case genResult of
Rejected { reason } ->
{ state
| failure =
Just
{ given = Nothing
, expectation =
Test.Expectation.fail
{ description = reason
, reason = Invalid InvalidFuzzer
}
}
, state.distributionCount
)
, currentSeed = nextSeed
, runsElapsed = state.runsElapsed + 1
}

Generated { prng, value } ->
let
failure : Maybe Failure
failure =
testGeneratedValue
{ getExpectation = c.testFn
, fuzzer = c.fuzzer
, randomRun = PRNG.getRun prng
, value = value
, expectation = c.testFn value
}
Generated { prng, value } ->
let
randomRun : RandomRun
randomRun =
PRNG.getRun prng

randomRunList : List Int
randomRunList =
RandomRun.toList randomRun

nextDistributionCount : Maybe (Dict (List String) Int)
nextDistributionCount =
Maybe.map2
(\labels old ->
let
foundLabels : List String
foundLabels =
labels
|> List.filterMap
(\( label, predicate ) ->
if predicate value then
Just label

else
Nothing
)
in
Dict.increment foundLabels old
)
(Test.Distribution.Internal.getDistributionLabels c.distribution)
state.distributionCount
in
if Set.member randomRunList state.seenRandomRuns then
{- We've tried this RandomRun already and know it ends in Pass
(if it ended in Failure we'd have short-circuited).
Let's not do unneeded work!

We still want to update the distribution counts though,
those don't care about whether the value passes or fails the
test.
-}
{ state
| currentSeed = nextSeed
, distributionCount = nextDistributionCount
, runsSkipped = state.runsSkipped + 1
}

distributionCounter : Maybe (Dict (List String) Int)
distributionCounter =
Maybe.map2
(\labels old ->
let
foundLabels : List String
foundLabels =
labels
|> List.filterMap
(\( label, predicate ) ->
if predicate value then
Just label

else
Nothing
)
in
Dict.increment foundLabels old
)
(Test.Distribution.Internal.getDistributionLabels c.distribution)
state.distributionCount
in
( failure, distributionCounter )
in
{ state
| failure = maybeFailure
, distributionCount = newDistributionCounter
, currentSeed = nextSeed
, runsElapsed = state.runsElapsed + 1
}
else
let
nextFailure : Maybe Failure
nextFailure =
testGeneratedValue
{ getExpectation = c.testFn
, fuzzer = c.fuzzer
, randomRun = randomRun
, value = value
, expectation = c.testFn value
}
in
{ state
| failure = nextFailure
, distributionCount = nextDistributionCount
, currentSeed = nextSeed
, runsElapsed = state.runsElapsed + 1
, seenRandomRuns = Set.insert randomRunList state.seenRandomRuns
}


includeCombinationsInBaseCounts : Dict (List String) Int -> Dict (List String) Int
Expand Down
16 changes: 16 additions & 0 deletions tests/src/FuzzerTests.elm
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,22 @@ distributionTests =
(Fuzz.intRange 1 20)
"Int range boundaries - mandatory"
(\n -> Expect.pass)
, Test.skip <|
-- we don't want failing tests around by default
Test.fuzzWith
{ runs = 10000
, distribution =
Test.expectDistribution
[ ( Test.Distribution.atLeast 5.5, "low", \n -> n == 1 )
, ( Test.Distribution.atLeast 4, "high", \n -> n == 20 )
, ( Test.Distribution.atLeast 80, "in between", \n -> n > 1 && n < 20 )
, ( Test.Distribution.zero, "outside", \n -> n < 1 || n > 20 )
, ( Test.Distribution.moreThanZero, "one", \n -> n == 1 )
]
}
(Fuzz.intRange 1 20)
"Failing distribution test"
(\n -> Expect.pass)
]


Expand Down
Loading