F# Data


F# Data: Anonymizing JSON

This tutorial shows how to implement an anonymizer for a JSON document (represented using the JsonValue type discussed in JSON parser article) This functionality is not directly available in the F# Data library, but it can be very easily implemented by recursively walking over the JSON document.

If you want to use the JSON anonymizer in your code, you can copy the source from GitHub and just include it in your project. If you use these functions often and would like to see them in the F# Data library, please submit a feature request.

DISCLAIMER: Don't use this for sensitive data as it's just a sample

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
75: 
76: 
77: 
78: 
79: 
#r "../../../bin/lib/net45/FSharp.Data.dll"
open System
open System.Globalization
open FSharp.Data

type JsonAnonymizer(?propertiesToSkip, ?valuesToSkip) = 

  let propertiesToSkip = Set.ofList (defaultArg propertiesToSkip [])
  let valuesToSkip = Set.ofList (defaultArg valuesToSkip [])

  let rng = Random()

  let digits = [| '0' .. '9' |]
  let lowerLetters = [| 'a' .. 'z' |]
  let upperLetters = [| 'A' .. 'Z' |]

  let getRandomChar (c:char) =
      if Char.IsDigit c then digits.[rng.Next(10)]
      elif Char.IsLetter c then
          if Char.IsLower c
          then lowerLetters.[rng.Next(26)]
          else upperLetters.[rng.Next(26)]
      else c

  let randomize (str:string) =
      String(str.ToCharArray() |> Array.map getRandomChar)

  let rec anonymize json =
      match json with
      | JsonValue.String s when valuesToSkip.Contains s -> json
      | JsonValue.String s ->
          let typ = 
            Runtime.StructuralInference.inferPrimitiveType 
              CultureInfo.InvariantCulture s

          ( if typ = typeof<Guid> then Guid.NewGuid().ToString()
            elif typ = typeof<Runtime.StructuralTypes.Bit0> || 
              typ = typeof<Runtime.StructuralTypes.Bit1> then s
            elif typ = typeof<DateTime> then s
            else 
              let prefix, s =
                if s.StartsWith "http://" then 
                  "http://", s.Substring("http://".Length)
                elif s.StartsWith "https://" then 
                  "https://", s.Substring("https://".Length)
                else "", s
              prefix + randomize s )
          |> JsonValue.String
      | JsonValue.Number d -> 
          let typ = 
            Runtime.StructuralInference.inferPrimitiveType 
              CultureInfo.InvariantCulture (d.ToString())
          if typ = typeof<Runtime.StructuralTypes.Bit0> || 
            typ = typeof<Runtime.StructuralTypes.Bit1> then json
          else d.ToString() |> randomize |> Decimal.Parse |> JsonValue.Number
      | JsonValue.Float f -> 
          f.ToString()
          |> randomize 
          |> Double.Parse 
          |> JsonValue.Float
      | JsonValue.Boolean _  | JsonValue.Null -> json
      | JsonValue.Record props -> 
          props 
          |> Array.map (fun (key, value) -> key, 
              if propertiesToSkip.Contains key then value 
              else anonymize value)
          |> JsonValue.Record
      | JsonValue.Array array -> 
          array 
          |> Array.map anonymize 
          |> JsonValue.Array

  member __.Anonymize json = anonymize json

let json = JsonValue.Load (__SOURCE_DIRECTORY__ + "../../data/TwitterStream.json")
printfn "%O" json

let anonymizedJson = (JsonAnonymizer ["lang"]).Anonymize json
printfn "%O" anonymizedJson

Related articles

namespace System
namespace System.Globalization
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
Multiple items
module JsonAnonymizer

--------------------
type JsonAnonymizer =
  new : ?propertiesToSkip:string list * ?valuesToSkip:string list -> JsonAnonymizer
  member Anonymize : json:JsonValue -> JsonValue

--------------------
new : ?propertiesToSkip:string list * ?valuesToSkip:string list -> JsonAnonymizer
val propertiesToSkip : string list option
val valuesToSkip : string list option
val propertiesToSkip : Set<string>
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IReadOnlyCollection<'T>
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  ...

--------------------
new : elements:seq<'T> -> Set<'T>
val ofList : elements:'T list -> Set<'T> (requires comparison)
val defaultArg : arg:'T option -> defaultValue:'T -> 'T
val valuesToSkip : Set<string>
val rng : Random
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit
  member NextDouble : unit -> float

--------------------
Random() : Random
Random(Seed: int) : Random
val digits : char []
val lowerLetters : char []
val upperLetters : char []
val getRandomChar : (char -> char)
val c : char
Multiple items
val char : value:'T -> char (requires member op_Explicit)

--------------------
type char = Char
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end
Char.IsDigit(c: char) : bool
Char.IsDigit(s: string, index: int) : bool
Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int
Char.IsLetter(c: char) : bool
Char.IsLetter(s: string, index: int) : bool
Char.IsLower(c: char) : bool
Char.IsLower(s: string, index: int) : bool
val randomize : (string -> String)
val str : string
Multiple items
val string : value:'T -> string

--------------------
type string = String
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

--------------------
String(value: nativeptr<char>) : String
String(value: nativeptr<sbyte>) : String
String(value: char []) : String
String(c: char, count: int) : String
String(value: nativeptr<char>, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int) : String
String(value: char [], startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : String
String.ToCharArray() : char []
String.ToCharArray(startIndex: int, length: int) : char []
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
val anonymize : (JsonValue -> JsonValue)
val json : JsonValue
type JsonValue =
  | String of string
  | Number of decimal
  | Float of float
  | Record of properties: (string * JsonValue) []
  | Array of elements: JsonValue []
  | Boolean of bool
  | Null
    member private PrepareRequest : httpMethod:string option * headers:#seq<string * string> option -> HttpRequestBody * (string * string) list * string
    member Request : url:string * ?httpMethod:string * ?headers:seq<string * string> -> HttpResponse
    member RequestAsync : url:string * ?httpMethod:string * ?headers:seq<string * string> -> Async<HttpResponse>
    override ToString : unit -> string
    member ToString : saveOptions:JsonSaveOptions -> string
    member WriteTo : w:TextWriter * saveOptions:JsonSaveOptions -> unit
    static member AsyncLoad : uri:string * ?encoding:Encoding -> Async<JsonValue>
    static member private JsonStringEncodeTo : w:TextWriter -> value:string -> unit
    static member Load : reader:TextReader -> JsonValue
    static member Load : stream:Stream -> JsonValue
    ...
union case JsonValue.String: string -> JsonValue
val s : string
member Set.Contains : value:'T -> bool
val typ : Type
Multiple items
namespace FSharp.Data.Runtime

--------------------
namespace System.Runtime
module StructuralInference

from FSharp.Data.Runtime
val inferPrimitiveType : cultureInfo:CultureInfo -> value:string -> Type
Multiple items
type CultureInfo =
  new : name:string -> CultureInfo + 3 overloads
  member Calendar : Calendar
  member ClearCachedData : unit -> unit
  member Clone : unit -> obj
  member CompareInfo : CompareInfo
  member CultureTypes : CultureTypes
  member DateTimeFormat : DateTimeFormatInfo with get, set
  member DisplayName : string
  member EnglishName : string
  member Equals : value:obj -> bool
  ...

--------------------
CultureInfo(name: string) : CultureInfo
CultureInfo(culture: int) : CultureInfo
CultureInfo(name: string, useUserOverride: bool) : CultureInfo
CultureInfo(culture: int, useUserOverride: bool) : CultureInfo
property CultureInfo.InvariantCulture: CultureInfo
val typeof<'T> : Type
Multiple items
type Guid =
  struct
    new : b:byte[] -> Guid + 4 overloads
    member CompareTo : value:obj -> int + 1 overload
    member Equals : o:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member ToByteArray : unit -> byte[]
    member ToString : unit -> string + 2 overloads
    static val Empty : Guid
    static member NewGuid : unit -> Guid
    static member Parse : input:string -> Guid
    static member ParseExact : input:string * format:string -> Guid
    ...
  end

--------------------
Guid ()
Guid(b: byte []) : Guid
Guid(g: string) : Guid
Guid(a: int, b: int16, c: int16, d: byte []) : Guid
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid.NewGuid() : Guid
namespace FSharp.Data.Runtime.StructuralTypes
Multiple items
union case Runtime.StructuralTypes.Bit0.Bit0: Runtime.StructuralTypes.Bit0

--------------------
type Bit0 = | Bit0
Multiple items
union case Runtime.StructuralTypes.Bit1.Bit1: Runtime.StructuralTypes.Bit1

--------------------
type Bit1 = | Bit1
Multiple items
type DateTime =
  struct
    new : ticks:int64 -> DateTime + 10 overloads
    member Add : value:TimeSpan -> DateTime
    member AddDays : value:float -> DateTime
    member AddHours : value:float -> DateTime
    member AddMilliseconds : value:float -> DateTime
    member AddMinutes : value:float -> DateTime
    member AddMonths : months:int -> DateTime
    member AddSeconds : value:float -> DateTime
    member AddTicks : value:int64 -> DateTime
    member AddYears : value:int -> DateTime
    ...
  end

--------------------
DateTime ()
   (+0 other overloads)
DateTime(ticks: int64) : DateTime
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Calendar) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Calendar) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : DateTime
   (+0 other overloads)
val prefix : string
String.StartsWith(value: string) : bool
String.StartsWith(value: string, comparisonType: StringComparison) : bool
String.StartsWith(value: string, ignoreCase: bool, culture: CultureInfo) : bool
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
union case JsonValue.Number: decimal -> JsonValue
val d : decimal
Decimal.ToString() : string
Decimal.ToString(provider: IFormatProvider) : string
Decimal.ToString(format: string) : string
Decimal.ToString(format: string, provider: IFormatProvider) : string
Multiple items
type Decimal =
  struct
    new : value:int -> decimal + 7 overloads
    member CompareTo : value:obj -> int + 1 overload
    member Equals : value:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val Zero : decimal
    static val One : decimal
    static val MinusOne : decimal
    static val MaxValue : decimal
    ...
  end

--------------------
Decimal ()
Decimal(value: int) : Decimal
Decimal(value: uint32) : Decimal
Decimal(value: int64) : Decimal
Decimal(value: uint64) : Decimal
Decimal(value: float32) : Decimal
Decimal(value: float) : Decimal
Decimal(bits: int []) : Decimal
Decimal(lo: int, mid: int, hi: int, isNegative: bool, scale: byte) : Decimal
Decimal.Parse(s: string) : decimal
Decimal.Parse(s: string, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: NumberStyles) : decimal
Decimal.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : decimal
union case JsonValue.Float: float -> JsonValue
val f : float
Double.ToString() : string
Double.ToString(provider: IFormatProvider) : string
Double.ToString(format: string) : string
Double.ToString(format: string, provider: IFormatProvider) : string
type Double =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MinValue : float
    static val MaxValue : float
    static val Epsilon : float
    static val NegativeInfinity : float
    static val PositiveInfinity : float
    ...
  end
Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: NumberStyles) : float
Double.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : float
union case JsonValue.Boolean: bool -> JsonValue
union case JsonValue.Null: JsonValue
union case JsonValue.Record: properties: (string * JsonValue) [] -> JsonValue
val props : (string * JsonValue) []
val key : string
val value : JsonValue
union case JsonValue.Array: elements: JsonValue [] -> JsonValue
Multiple items
val array : JsonValue []

--------------------
type 'T array = 'T []
static member JsonValue.Load : reader:IO.TextReader -> JsonValue
static member JsonValue.Load : stream:IO.Stream -> JsonValue
static member JsonValue.Load : uri:string * ?encoding:Text.Encoding -> JsonValue
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
val anonymizedJson : JsonValue
Fork me on GitHub