F# Data


F# Data: JSONの匿名化

このチュートリアルではJSONドキュメント ( JSONパーサーの記事 で説明した JsonValue 型で表現されるドキュメント) の匿名化機能を実装する方法を紹介します。 この機能はF# Dataライブラリに備わっている機能ではありませんが、 JSONドキュメントを再帰的に処理するだけで非常に簡単に実装できます。

JSON匿名化機能をコード内で利用したい場合には、 GitHubにあるソースコード をコピーして プロジェクトに加えるだけです。 この機能を頻繁に利用するようになり、F# Dataライブラリに取り込んでもらいたい ということであれば 機能のリクエスト を送信してください。

警告: この機能は単なるサンプルであるため、重要なデータを処理させないでください。

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
#r "../../../../bin/FSharp.Data.dll"
open System
open System.Globalization
open FSharp.Data

type JsonAnonymizer(?propertiesToSkip, ?valuesToSkip) = 

    let propertiesToSkip = Set.ofList (defaultArg propertiesToSkip [])
    let valuesToSkip = Set.ofList (defaultArg valuesToSkip [])

    let rng = Random()

    let digits = [| '0' .. '9' |]
    let lowerLetters = [| 'a' .. 'z' |]
    let upperLetters = [| 'A' .. 'Z' |]

    let getRandomChar (c:char) =
        if Char.IsDigit c then digits.[rng.Next(10)]
        elif Char.IsLetter c then
            if Char.IsLower c
            then lowerLetters.[rng.Next(26)]
            else upperLetters.[rng.Next(26)]
        else c

    let randomize (str:string) =
        String(str.ToCharArray() |> Array.map getRandomChar)

    let rec anonymize json =
        match json with
        | JsonValue.String s when valuesToSkip.Contains s -> json
        | JsonValue.String s ->
            let typ = Runtime.StructuralInference.inferPrimitiveType CultureInfo.InvariantCulture s
            if typ = typeof<Guid> then Guid.NewGuid().ToString()
            elif typ = typeof<Runtime.StructuralTypes.Bit0> || typ = typeof<Runtime.StructuralTypes.Bit1> then s
            elif typ = typeof<DateTime> then s
            else 
                let prefix, s =
                    if s.StartsWith "http://" then "http://", s.Substring("http://".Length)
                    elif s.StartsWith "https://" then "https://", s.Substring("https://".Length)
                    else "", s
                prefix + randomize s
            |> JsonValue.String
        | JsonValue.Number d -> 
            let typ = Runtime.StructuralInference.inferPrimitiveType CultureInfo.InvariantCulture (d.ToString())
            if typ = typeof<Runtime.StructuralTypes.Bit0> || typ = typeof<Runtime.StructuralTypes.Bit1>
            then json
            else d.ToString() |> randomize |> Decimal.Parse |> JsonValue.Number
        | JsonValue.Float f -> 
            f.ToString()
            |> randomize 
            |> Double.Parse 
            |> JsonValue.Float
        | JsonValue.Boolean _  | JsonValue.Null -> json
        | JsonValue.Record props -> 
            props 
            |> Array.map (fun (key, value) -> key, if propertiesToSkip.Contains key then value else anonymize value)
            |> JsonValue.Record
        | JsonValue.Array array -> 
            array 
            |> Array.map anonymize 
            |> JsonValue.Array

    member __.Anonymize json = anonymize json

let json = JsonValue.Load (__SOURCE_DIRECTORY__ + "../../data/TwitterStream.json")
printfn "%O" json

let anonymizedJson = (JsonAnonymizer ["lang"]).Anonymize json
printfn "%O" anonymizedJson

関連する記事

namespace System
namespace System.Globalization
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
Multiple items
type JsonAnonymizer =
  new : ?propertiesToSkip:string list * ?valuesToSkip:IComparable list -> JsonAnonymizer
  member Anonymize : json:'a -> 'b

Full name: JsonAnonymizer.JsonAnonymizer

--------------------
new : ?propertiesToSkip:string list * ?valuesToSkip:IComparable list -> JsonAnonymizer
val propertiesToSkip : string list option
val valuesToSkip : IComparable list option
val propertiesToSkip : Set<string>
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  member IsProperSupersetOf : otherSet:Set<'T> -> bool
  ...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
val ofList : elements:'T list -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.ofList
val defaultArg : arg:'T option -> defaultValue:'T -> 'T

Full name: Microsoft.FSharp.Core.Operators.defaultArg
val valuesToSkip : Set<IComparable>
val rng : Random
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit
  member NextDouble : unit -> float

Full name: System.Random

--------------------
Random() : unit
Random(Seed: int) : unit
val digits : char []
val lowerLetters : char []
val upperLetters : char []
val getRandomChar : (char -> char)
val c : char
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = Char

Full name: Microsoft.FSharp.Core.char
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end

Full name: System.Char
Char.IsDigit(c: char) : bool
Char.IsDigit(s: string, index: int) : bool
Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int
Char.IsLetter(c: char) : bool
Char.IsLetter(s: string, index: int) : bool
Char.IsLower(c: char) : bool
Char.IsLower(s: string, index: int) : bool
val randomize : (string -> String)
val str : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
String.ToCharArray() : char []
String.ToCharArray(startIndex: int, length: int) : char []
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
val anonymize : ('a -> 'b)
val json : 'a
member Set.Contains : value:'T -> bool
namespace System.Runtime
Multiple items
type CultureInfo =
  new : name:string -> CultureInfo + 3 overloads
  member Calendar : Calendar
  member ClearCachedData : unit -> unit
  member Clone : unit -> obj
  member CompareInfo : CompareInfo
  member CultureTypes : CultureTypes
  member DateTimeFormat : DateTimeFormatInfo with get, set
  member DisplayName : string
  member EnglishName : string
  member Equals : value:obj -> bool
  ...

Full name: System.Globalization.CultureInfo

--------------------
CultureInfo(name: string) : unit
CultureInfo(culture: int) : unit
CultureInfo(name: string, useUserOverride: bool) : unit
CultureInfo(culture: int, useUserOverride: bool) : unit
property CultureInfo.InvariantCulture: CultureInfo
val typeof<'T> : Type

Full name: Microsoft.FSharp.Core.Operators.typeof
Multiple items
type Guid =
  struct
    new : b:byte[] -> Guid + 4 overloads
    member CompareTo : value:obj -> int + 1 overload
    member Equals : o:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member ToByteArray : unit -> byte[]
    member ToString : unit -> string + 2 overloads
    static val Empty : Guid
    static member NewGuid : unit -> Guid
    static member Parse : input:string -> Guid
    static member ParseExact : input:string * format:string -> Guid
    ...
  end

Full name: System.Guid

--------------------
Guid()
Guid(b: byte []) : unit
Guid(g: string) : unit
Guid(a: int, b: int16, c: int16, d: byte []) : unit
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : unit
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : unit
Guid.NewGuid() : Guid
Multiple items
type DateTime =
  struct
    new : ticks:int64 -> DateTime + 10 overloads
    member Add : value:TimeSpan -> DateTime
    member AddDays : value:float -> DateTime
    member AddHours : value:float -> DateTime
    member AddMilliseconds : value:float -> DateTime
    member AddMinutes : value:float -> DateTime
    member AddMonths : months:int -> DateTime
    member AddSeconds : value:float -> DateTime
    member AddTicks : value:int64 -> DateTime
    member AddYears : value:int -> DateTime
    ...
  end

Full name: System.DateTime

--------------------
DateTime()
   (+0 other overloads)
DateTime(ticks: int64) : unit
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : unit
   (+0 other overloads)
Multiple items
type Decimal =
  struct
    new : value:int -> decimal + 7 overloads
    member CompareTo : value:obj -> int + 1 overload
    member Equals : value:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val Zero : decimal
    static val One : decimal
    static val MinusOne : decimal
    static val MaxValue : decimal
    ...
  end

Full name: System.Decimal

--------------------
Decimal()
Decimal(value: int) : unit
Decimal(value: uint32) : unit
Decimal(value: int64) : unit
Decimal(value: uint64) : unit
Decimal(value: float32) : unit
Decimal(value: float) : unit
Decimal(bits: int []) : unit
Decimal(lo: int, mid: int, hi: int, isNegative: bool, scale: byte) : unit
Decimal.Parse(s: string) : decimal
Decimal.Parse(s: string, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: NumberStyles) : decimal
Decimal.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : decimal
type Double =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MinValue : float
    static val MaxValue : float
    static val Epsilon : float
    static val NegativeInfinity : float
    static val PositiveInfinity : float
    ...
  end

Full name: System.Double
Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: NumberStyles) : float
Double.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : float
type Boolean =
  struct
    member CompareTo : obj:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val TrueString : string
    static val FalseString : string
    static member Parse : value:string -> bool
    static member TryParse : value:string * result:bool -> bool
  end

Full name: System.Boolean
type 'T array = 'T []

Full name: Microsoft.FSharp.Core.array<_>
member JsonAnonymizer.Anonymize : json:'a -> 'b

Full name: JsonAnonymizer.JsonAnonymizer.Anonymize
val json : obj

Full name: JsonAnonymizer.json
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val anonymizedJson : obj

Full name: JsonAnonymizer.anonymizedJson
Fork me on GitHub